setwd("/Users/leonardocarella/Documents/LSQ_27092023") # change to local wd
`%notin%` <- Negate(`%in%`)
library(tidyverse)
library(readxl)
library(lubridate)
library(lmtest)
library(multiwayvcov)
library(stargazer)
library(sandwich)
library(dfidx)
library(mlogit)
library(patchwork)
library(xtable)
library(ggh4x)
library(gamlss)

# Produces main analysis and robustness check.

#### Get data ####

election_11_bundestag <- "25-01-1987"
election_12_bundestag <- "02-12-1990"
election_13_bundestag <- "16-10-1994"
election_14_bundestag <- "27-09-1998"
election_15_bundestag <- "22-09-2002"
election_16_bundestag <- "18-09-2005"
election_17_bundestag <- "27-09-2009"
election_18_bundestag <- "22-09-2013"
election_19_bundestag <- "14-09-2017"
election_20_bundestag <- "26-09-2021"

landtag <- readRDS("landtag_18_12.rds")
bundestag <- readRDS("bundestag_2010.rds")

candidacies87 <- readRDS("candidacies87_clean_anon.rds")
candidacies90 <- readRDS("candidacies90_clean_anon.rds")
candidacies94 <- readRDS("candidacies94_clean_anon.rds")
candidacies98 <- readRDS("candidacies98_clean_anon.rds")
candidacies02 <- readRDS("candidacies02_clean_anon.rds")
candidacies05 <- readRDS("candidacies05_clean_anon.rds") 
candidacies09 <- readRDS("candidacies09_clean_anon.rds")
candidacies13 <- readRDS("candidacies13_clean_anon.rds")
candidacies17 <- readRDS("candidacies17_clean_anon.rds")
candidacies21 <- readRDS("candidacies21_clean_anon.rds")

landesregierung <- read_excel("landesregierung.xlsx", 
                              sheet = "land_govt_composition")

landtagswahlen <- read_excel("landesregierung.xlsx", 
                             sheet = "land_election_results") 

landtagswahlen <- landtagswahlen %>%
  dplyr::filter(type == "Share") %>% 
  pivot_longer(colnames(landtagswahlen)[-c(1,2,3,23)], names_to = "party", values_to = "share") %>%
  dplyr::filter(party != "no_seats_voter_turnout") %>%
  dplyr::rename(share_last_fed_election = share,
                party_elected_with = party) %>%
  dplyr::select(state, legislature, party_elected_with, 
                share_last_fed_election)


#### Subset dataset to members present prior to federal election ####

#Exclude Berlin from 11th legislature

landtag_subset11 <- left_join(left_join(landtag %>%
                                          dplyr::filter(party_ran_11_fed_election == 1 &
                                                          in_landtag_for_11_fed_election == 1 &
                                                          state != "BE") %>%
                                          dplyr::mutate(ran = ran_for_11_fed_election,
                                                        moved_up = in_bundestag_11_fed_election,
                                                        age = time_length(difftime(dmy(election_11_bundestag), 
                                                                                   ymd(dob)), "years")), 
                                        landesregierung %>%
                                          dplyr::filter(dmy(election_11_bundestag) %within% interval(start, end)) %>%
                                          pivot_longer(colnames(landesregierung)[-c(1,2,3)]) %>%
                                          filter(value == 1) %>%
                                          dplyr::rename(party_elected_with = name, 
                                                        pty_in_govt = value) %>%
                                          dplyr::select(-c(start, end)), by = c("state", "party_elected_with")) %>%
                                dplyr::mutate(seniority = as.numeric(no_past_terms)+1,
                                              pty_in_govt = replace_na(pty_in_govt, 0),
                                              legislature = as.numeric(as.character(legislature))), 
                              landtagswahlen, by = c("state", "legislature", "party_elected_with")) %>%
  dplyr::mutate(which_federal_election = 11)

landtag_subset12 <- left_join(left_join(landtag %>%
                                          dplyr::filter(party_ran_12_fed_election == 1 &
                                                          in_landtag_for_12_fed_election == 1) %>%
                                          dplyr::mutate(ran = ran_for_12_fed_election,
                                                        moved_up = in_bundestag_12_fed_election,
                                                        age = time_length(difftime(dmy(election_12_bundestag), 
                                                                                   ymd(dob)), "years")), 
                                        landesregierung %>%
                                          dplyr::filter(dmy(election_12_bundestag) %within% interval(start, end)) %>%
                                          pivot_longer(colnames(landesregierung)[-c(1,2,3)]) %>%
                                          filter(value == 1) %>%
                                          dplyr::rename(party_elected_with = name, 
                                                        pty_in_govt = value) %>%
                                          dplyr::select(-c(start, end)), by = c("state", "party_elected_with")) %>%
                                dplyr::mutate(seniority = as.numeric(no_past_terms)+1,
                                              pty_in_govt = replace_na(pty_in_govt, 0),
                                              legislature = as.numeric(as.character(legislature))), 
                              landtagswahlen, by = c("state", "legislature", "party_elected_with")) %>%
  dplyr::mutate(which_federal_election = 12)

landtag_subset13 <- left_join(left_join(landtag %>%
                                          dplyr::filter(party_ran_13_fed_election == 1 &
                                                          in_landtag_for_13_fed_election == 1) %>%
                                          dplyr::mutate(ran = ran_for_13_fed_election,
                                                        moved_up = in_bundestag_13_fed_election,
                                                        age = time_length(difftime(dmy(election_13_bundestag), 
                                                                                   ymd(dob)), "years")), 
                                        landesregierung %>%
                                          dplyr::filter(dmy(election_13_bundestag) %within% interval(start, end)) %>%
                                          pivot_longer(colnames(landesregierung)[-c(1,2,3)]) %>%
                                          filter(value == 1) %>%
                                          dplyr::rename(party_elected_with = name, 
                                                        pty_in_govt = value) %>%
                                          dplyr::select(-c(start, end)), by = c("state", "party_elected_with")) %>%
                                dplyr::mutate(seniority = as.numeric(no_past_terms)+1,
                                              pty_in_govt = replace_na(pty_in_govt, 0),
                                              legislature = as.numeric(as.character(legislature))), 
                              landtagswahlen, by = c("state", "legislature", "party_elected_with")) %>%
  dplyr::mutate(which_federal_election = 13)


landtag_subset14 <- left_join(left_join(landtag %>%
                                          dplyr::filter(party_ran_14_fed_election == 1 &
                                                          in_landtag_for_14_fed_election == 1) %>%
                                          dplyr::mutate(ran = ran_for_14_fed_election,
                                                        moved_up = in_bundestag_14_fed_election,
                                                        age = time_length(difftime(dmy(election_14_bundestag), 
                                                                                   ymd(dob)), "years")), 
                                        landesregierung %>%
                                          dplyr::filter(dmy(election_14_bundestag) %within% interval(start, end)) %>%
                                          pivot_longer(colnames(landesregierung)[-c(1,2,3)]) %>%
                                          filter(value == 1) %>%
                                          dplyr::rename(party_elected_with = name, 
                                                        pty_in_govt = value) %>%
                                          dplyr::select(-c(start, end)), by = c("state", "party_elected_with")) %>%
                                dplyr::mutate(seniority = as.numeric(no_past_terms)+1,
                                              pty_in_govt = replace_na(pty_in_govt, 0),
                                              legislature = as.numeric(as.character(legislature))), 
                              landtagswahlen, by = c("state", "legislature", "party_elected_with")) %>%
  dplyr::mutate(which_federal_election = 14)


landtag_subset15 <- left_join(left_join(landtag %>%
                                          dplyr::filter(party_ran_15_fed_election == 1 &
                                                          in_landtag_for_15_fed_election == 1) %>%
                                          dplyr::mutate(ran = ran_for_15_fed_election,
                                                        moved_up = in_bundestag_15_fed_election,
                                                        age = time_length(difftime(dmy(election_15_bundestag), 
                                                                                   ymd(dob)), "years")), 
                                        landesregierung %>%
                                          dplyr::filter(dmy(election_15_bundestag) %within% interval(start, end)) %>%
                                          pivot_longer(colnames(landesregierung)[-c(1,2,3)]) %>%
                                          filter(value == 1) %>%
                                          dplyr::rename(party_elected_with = name, 
                                                        pty_in_govt = value) %>%
                                          dplyr::select(-c(start, end)), by = c("state", "party_elected_with")) %>%
                                dplyr::mutate(seniority = as.numeric(no_past_terms)+1,
                                              pty_in_govt = replace_na(pty_in_govt, 0),
                                              legislature = as.numeric(as.character(legislature))), 
                              landtagswahlen, by = c("state", "legislature", "party_elected_with")) %>%
  dplyr::mutate(which_federal_election = 15)


landtag_subset16 <- left_join(left_join(landtag %>%
                                          dplyr::filter(party_ran_16_fed_election == 1 &
                                                          in_landtag_for_16_fed_election == 1) %>%
                                          dplyr::mutate(ran = ran_for_16_fed_election,
                                                        moved_up = in_bundestag_16_fed_election,
                                                        age = time_length(difftime(dmy(election_16_bundestag), 
                                                                                   ymd(dob)), "years")), 
                                        landesregierung %>%
                                          dplyr::filter(dmy(election_16_bundestag) %within% interval(start, end)) %>%
                                          pivot_longer(colnames(landesregierung)[-c(1,2,3)]) %>%
                                          filter(value == 1) %>%
                                          dplyr::rename(party_elected_with = name, 
                                                        pty_in_govt = value) %>%
                                          dplyr::select(-c(start, end)), by = c("state", "party_elected_with")) %>%
                                dplyr::mutate(seniority = as.numeric(no_past_terms)+1,
                                              pty_in_govt = replace_na(pty_in_govt, 0),
                                              legislature = as.numeric(as.character(legislature))), 
                              landtagswahlen, by = c("state", "legislature", "party_elected_with")) %>%
  dplyr::mutate(which_federal_election = 16)


landtag_subset17 <- left_join(left_join(landtag %>%
                                          dplyr::filter(party_ran_17_fed_election == 1 &
                                                          in_landtag_for_17_fed_election == 1) %>%
                                          dplyr::mutate(ran = ran_for_17_fed_election,
                                                        moved_up = in_bundestag_17_fed_election,
                                                        age = time_length(difftime(dmy(election_17_bundestag), 
                                                                                   ymd(dob)), "years")), 
                                        landesregierung %>%
                                          dplyr::filter(dmy(election_17_bundestag) %within% interval(start, end)) %>%
                                          pivot_longer(colnames(landesregierung)[-c(1,2,3)]) %>%
                                          filter(value == 1) %>%
                                          dplyr::rename(party_elected_with = name, 
                                                        pty_in_govt = value) %>%
                                          dplyr::select(-c(start, end)), by = c("state", "party_elected_with")) %>%
                                dplyr::mutate(seniority = as.numeric(no_past_terms)+1,
                                              pty_in_govt = replace_na(pty_in_govt, 0),
                                              legislature = as.numeric(as.character(legislature))), 
                              landtagswahlen, by = c("state", "legislature", "party_elected_with")) %>%
  dplyr::mutate(which_federal_election = 17)


landtag_subset18 <- left_join(left_join(landtag %>%
                                          dplyr::filter(party_ran_18_fed_election == 1 &
                                                          in_landtag_for_18_fed_election == 1) %>%
                                          dplyr::mutate(ran = ran_for_18_fed_election,
                                                        moved_up = in_bundestag_18_fed_election,
                                                        age = time_length(difftime(dmy(election_18_bundestag), 
                                                                                   ymd(dob)), "years")), 
                                        landesregierung %>%
                                          dplyr::filter(dmy(election_18_bundestag) %within% interval(start, end)) %>%
                                          pivot_longer(colnames(landesregierung)[-c(1,2,3)]) %>%
                                          filter(value == 1) %>%
                                          dplyr::rename(party_elected_with = name, 
                                                        pty_in_govt = value) %>%
                                          dplyr::select(-c(start, end)), by = c("state", "party_elected_with")) %>%
                                dplyr::mutate(seniority = as.numeric(no_past_terms)+1,
                                              pty_in_govt = replace_na(pty_in_govt, 0),
                                              legislature = as.numeric(as.character(legislature))), 
                              landtagswahlen, by = c("state", "legislature", "party_elected_with")) %>%
  dplyr::mutate(which_federal_election = 18)


landtag_subset19 <- left_join(left_join(landtag %>%
                                          dplyr::filter(party_ran_19_fed_election == 1 &
                                                          in_landtag_for_19_fed_election == 1) %>%
                                          dplyr::mutate(ran = ran_for_19_fed_election,
                                                        moved_up = in_bundestag_19_fed_election,
                                                        age = time_length(difftime(dmy(election_19_bundestag), 
                                                                                   ymd(dob)), "years")), 
                                        landesregierung %>%
                                          dplyr::filter(dmy(election_19_bundestag) %within% interval(start, end)) %>%
                                          pivot_longer(colnames(landesregierung)[-c(1,2,3)]) %>%
                                          filter(value == 1) %>%
                                          dplyr::rename(party_elected_with = name, 
                                                        pty_in_govt = value) %>%
                                          dplyr::select(-c(start, end)), by = c("state", "party_elected_with")) %>%
                                dplyr::mutate(seniority = as.numeric(no_past_terms)+1,
                                              pty_in_govt = replace_na(pty_in_govt, 0),
                                              legislature = as.numeric(as.character(legislature))), 
                              landtagswahlen, by = c("state", "legislature", "party_elected_with")) %>%
  dplyr::mutate(which_federal_election = 19)


landtag_subset20 <- left_join(left_join(landtag %>%
                                          dplyr::filter(party_ran_20_fed_election == 1 &
                                                          in_landtag_for_20_fed_election == 1) %>%
                                          dplyr::mutate(ran = ran_for_20_fed_election,
                                                        moved_up = in_bundestag_20_fed_election,
                                                        age = time_length(difftime(dmy(election_20_bundestag), 
                                                                                   ymd(dob)), "years")), 
                                        landesregierung %>%
                                          dplyr::filter(dmy(election_20_bundestag) %within% interval(start, end)) %>%
                                          pivot_longer(colnames(landesregierung)[-c(1,2,3)]) %>%
                                          filter(value == 1) %>%
                                          dplyr::rename(party_elected_with = name, 
                                                        pty_in_govt = value) %>%
                                          dplyr::select(-c(start, end)), by = c("state", "party_elected_with")) %>%
                                dplyr::mutate(seniority = as.numeric(no_past_terms)+1,
                                              pty_in_govt = replace_na(pty_in_govt, 0),
                                              legislature = as.numeric(as.character(legislature))), 
                              landtagswahlen, by = c("state", "legislature", "party_elected_with")) %>%
  dplyr::mutate(which_federal_election = 20)

# Create dataset of state legislators in office at the time of each federal election
landtag_subset <- rbind.data.frame(landtag_subset11,landtag_subset12,
                                   landtag_subset13,landtag_subset14,
                                   landtag_subset15,landtag_subset16,
                                   landtag_subset17,landtag_subset18,
                                   landtag_subset19,landtag_subset20)

#### Code independent variables #### 

# Code tier as dummy (all substitutes are elected through the lists)
landtag_subset$tier_dummy <- "SMD"
landtag_subset$tier_dummy[landtag_subset$elected_from %in% c("proportional", "substitute")] <- "list"

# Code time between federal election and last state election

landtag_subset$time_since_last_election <- NA
landtag_subset$time_since_last_election[landtag_subset$which_federal_election == 11] <-
  dmy(election_11_bundestag) - dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 11])
landtag_subset$time_since_last_election[landtag_subset$which_federal_election == 12] <-
  dmy(election_12_bundestag) - dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 12])
landtag_subset$time_since_last_election[landtag_subset$which_federal_election == 13] <-
  dmy(election_13_bundestag) - dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 13])
landtag_subset$time_since_last_election[landtag_subset$which_federal_election == 14] <-
  dmy(election_14_bundestag) - dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 14])
landtag_subset$time_since_last_election[landtag_subset$which_federal_election == 15] <-
  dmy(election_15_bundestag) - dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 15])
landtag_subset$time_since_last_election[landtag_subset$which_federal_election == 16] <-
  dmy(election_16_bundestag) - dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 16])
landtag_subset$time_since_last_election[landtag_subset$which_federal_election == 17] <-
  dmy(election_17_bundestag) - dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 17])
landtag_subset$time_since_last_election[landtag_subset$which_federal_election == 18] <-
  dmy(election_18_bundestag) - dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 18])
landtag_subset$time_since_last_election[landtag_subset$which_federal_election == 19] <-
  dmy(election_19_bundestag) - dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 19])
landtag_subset$time_since_last_election[landtag_subset$which_federal_election == 20] <-
  dmy(election_20_bundestag) - dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 20])

# code preferential voting in PR tier #
landtag_subset$pv <- 0
landtag_subset$pv[landtag_subset$elected_from %in% c("proportional", "substitute") 
                  & landtag_subset$state == "BY"] <- 1
landtag_subset$pv[landtag_subset$elected_from %in% c("proportional", "substitute") 
                  & landtag_subset$state == "HH" & landtag_subset$legislature %in% c(20, 21, 22)] <- 1
landtag_subset$pv[landtag_subset$elected_from %in% c("proportional", "substitute") 
                  & landtag_subset$state == "HB" & landtag_subset$legislature %in% c(18,19,20)] <- 1



#### Create dataset of legislators who ran for each election, with associated state list position and wahlkreis ####

landtag_subset$wk <- NA
landtag_subset$listplace <- NA

cands_11 <- unique(landtag_subset$id[landtag_subset$which_federal_election == 11 & landtag_subset$ran == 1])
cands_12 <- unique(landtag_subset$id[landtag_subset$which_federal_election == 12 & landtag_subset$ran == 1])
cands_13 <- unique(landtag_subset$id[landtag_subset$which_federal_election == 13 & landtag_subset$ran == 1])
cands_14 <- unique(landtag_subset$id[landtag_subset$which_federal_election == 14 & landtag_subset$ran == 1])
cands_15 <- unique(landtag_subset$id[landtag_subset$which_federal_election == 15 & landtag_subset$ran == 1])
cands_16 <- unique(landtag_subset$id[landtag_subset$which_federal_election == 16 & landtag_subset$ran == 1])
cands_17 <- unique(landtag_subset$id[landtag_subset$which_federal_election == 17 & landtag_subset$ran == 1])
cands_18 <- unique(landtag_subset$id[landtag_subset$which_federal_election == 18 & landtag_subset$ran == 1])
cands_19 <- unique(landtag_subset$id[landtag_subset$which_federal_election == 19 & landtag_subset$ran == 1])
cands_20 <- unique(landtag_subset$id[landtag_subset$which_federal_election == 20 & landtag_subset$ran == 1])

for (i in cands_11)
{
  wahlkreis <- unique(candidacies87$Wkr[candidacies87$landtag_id1 == i | 
                                          candidacies87$landtag_id2 == i])
  if (length(wahlkreis) == 2) {
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 11] <- wahlkreis[!is.na(wahlkreis)]
  }
  if (length(wahlkreis) == 1){
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 11] <- "List Only"
  }
  
  listenplatz <- unique(candidacies87$Platz[candidacies87$landtag_id1 == i | 
                                              candidacies87$landtag_id2 == i])
  if (length(listenplatz) == 2) {
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 11] <- listenplatz[!is.na(listenplatz)]
  }
  if (length(listenplatz) == 1){
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 11] <- "SMD Only"
  }
}

landtag_subset$listplace[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 11]
landtag_subset$wk[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 11]

for (i in cands_12)
{
  wahlkreis <- unique(candidacies90$Wkr[candidacies90$landtag_id1 == i | 
                                          candidacies90$landtag_id2 == i])
  if (length(wahlkreis) == 2) {
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 12] <- wahlkreis[!is.na(wahlkreis)]
  }
  if (length(wahlkreis) == 1){
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 12] <- "List Only"
  }
  
  listenplatz <- unique(candidacies90$Platz[candidacies90$landtag_id1 == i | 
                                              candidacies90$landtag_id2 == i])
  if (length(listenplatz) == 2) {
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 12] <- listenplatz[!is.na(listenplatz)]
  }
  if (length(listenplatz) == 1){
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 12] <- "SMD Only"
  }
}

landtag_subset$listplace[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 12]
landtag_subset$wk[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 12]

for (i in cands_13)
{
  wahlkreis <- unique(candidacies94$Wkr[candidacies94$landtag_id1 == i | 
                                          candidacies94$landtag_id2 == i])
  if (length(wahlkreis) == 2) {
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 13] <- wahlkreis[!is.na(wahlkreis)]
  }
  if (length(wahlkreis) == 1){
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 13] <- "List Only"
  }
  
  listenplatz <- unique(candidacies94$Platz[candidacies94$landtag_id1 == i | 
                                              candidacies94$landtag_id2 == i])
  if (length(listenplatz) == 2) {
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 13] <- listenplatz[!is.na(listenplatz)]
  }
  if (length(listenplatz) == 1){
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 13] <- "SMD Only"
  }
}

landtag_subset$listplace[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 13]
landtag_subset$wk[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 13]



for (i in cands_14)
{
  wahlkreis <- unique(candidacies98$Wkr[candidacies98$landtag_id1 == i | 
                                          candidacies98$landtag_id2 == i])
  if (length(wahlkreis) == 2) {
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 14] <- wahlkreis[!is.na(wahlkreis)]
  }
  if (length(wahlkreis) == 1){
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 14] <- "List Only"
  }
  
  listenplatz <- unique(candidacies98$Platz[candidacies98$landtag_id1 == i | 
                                              candidacies98$landtag_id2 == i])
  if (length(listenplatz) == 2) {
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 14] <- listenplatz[!is.na(listenplatz)]
  }
  if (length(listenplatz) == 1){
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 14] <- "SMD Only"
  }
}

landtag_subset$listplace[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 14]
landtag_subset$wk[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 14]


for (i in cands_15)
{
  wahlkreis <- unique(candidacies02$Wkr[candidacies02$landtag_id1 == i | 
                                          candidacies02$landtag_id2 == i])
  if (length(wahlkreis) == 2) {
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 15] <- wahlkreis[!is.na(wahlkreis)]
  }
  if (length(wahlkreis) == 1){
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 15] <- "List Only"
  }
  
  listenplatz <- unique(candidacies02$Platz[candidacies02$landtag_id1 == i | 
                                              candidacies02$landtag_id2 == i])
  if (length(listenplatz) == 2) {
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 15] <- listenplatz[!is.na(listenplatz)]
  }
  if (length(listenplatz) == 1){
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 15] <- "SMD Only"
  }
}

landtag_subset$listplace[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 15]
landtag_subset$wk[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 15]

for (i in cands_16)
{
  wahlkreis <- unique(candidacies05$Wkr[candidacies05$landtag_id1 == i | 
                                          candidacies05$landtag_id2 == i])
  if (length(wahlkreis) == 2) {
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 16] <- wahlkreis[!is.na(wahlkreis)]
  }
  if (length(wahlkreis) == 1){
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 16] <- "List Only"
  }
  
  listenplatz <- unique(candidacies05$Platz[candidacies05$landtag_id1 == i | 
                                              candidacies05$landtag_id2 == i])
  if (length(listenplatz) == 2) {
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 16] <- listenplatz[!is.na(listenplatz)]
  }
  if (length(listenplatz) == 1){
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 16] <- "SMD Only"
  }
}

landtag_subset$listplace[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 16]
landtag_subset$wk[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 16]


for (i in cands_17)
{
  wahlkreis <- unique(candidacies09$Wkr[candidacies09$landtag_id1 == i | 
                                          candidacies09$landtag_id2 == i])
  if (length(wahlkreis) == 2) {
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 17] <- wahlkreis[!is.na(wahlkreis)]
  }
  if (length(wahlkreis) == 1){
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 17] <- "List Only"
  }
  
  listenplatz <- unique(candidacies09$Platz[candidacies09$landtag_id1 == i | 
                                              candidacies09$landtag_id2 == i])
  if (length(listenplatz) == 2) {
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 17] <- listenplatz[!is.na(listenplatz)]
  }
  if (length(listenplatz) == 1){
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 17] <- "SMD Only"
  }
}

landtag_subset$listplace[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 17]
landtag_subset$wk[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 17]


for (i in cands_18)
{
  wahlkreis <- unique(candidacies13$Wkr[candidacies13$landtag_id1 == i | 
                                          candidacies13$landtag_id2 == i])
  if (length(wahlkreis) == 2) {
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 18] <- wahlkreis[!is.na(wahlkreis)]
  }
  if (length(wahlkreis) == 1){
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 18] <- "List Only"
  }
  
  listenplatz <- unique(candidacies13$Platz[candidacies13$landtag_id1 == i | 
                                              candidacies13$landtag_id2 == i])
  if (length(listenplatz) == 2) {
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 18] <- listenplatz[!is.na(listenplatz)]
  }
  if (length(listenplatz) == 1){
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 18] <- "SMD Only"
  }
}

landtag_subset$listplace[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 18]
landtag_subset$wk[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 18]


for (i in cands_19)
{
  wahlkreis <- unique(candidacies17$Wkr[candidacies17$landtag_id1 == i | 
                                          candidacies17$landtag_id2 == i])
  if (length(wahlkreis) == 2) {
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 19] <- wahlkreis[!is.na(wahlkreis)]
  }
  if (length(wahlkreis) == 1){
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 19] <- "List Only"
  }
  
  listenplatz <- unique(candidacies17$Liste_Platz[candidacies17$landtag_id1 == i | 
                                                    candidacies17$landtag_id2 == i])
  if (length(listenplatz) == 2) {
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 19] <- listenplatz[!is.na(listenplatz)]
  }
  if (length(listenplatz) == 1){
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 19] <- "SMD Only"
  }
}

landtag_subset$listplace[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 19]
landtag_subset$wk[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 19]


for (i in cands_20)
{
  wahlkreis <- unique(candidacies21$Wkr[candidacies21$landtag_id1 == i | 
                                          candidacies21$landtag_id2 == i])
  if (length(wahlkreis) == 2) {
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 20] <- wahlkreis[!is.na(wahlkreis)]
  }
  if (length(wahlkreis) == 1){
    landtag_subset$wk[landtag_subset$id == i & landtag_subset$which_federal_election == 20] <- "List Only"
  }
  
  listenplatz <- unique(candidacies21$Listenplatz[candidacies21$landtag_id1 == i | 
                                                    candidacies21$landtag_id2 == i])
  if (length(listenplatz) == 2) {
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 20] <- listenplatz[!is.na(listenplatz)]
  }
  if (length(listenplatz) == 1){
    landtag_subset$listplace[landtag_subset$id == i & landtag_subset$which_federal_election == 20] <- "SMD Only"
  }
}

landtag_subset$listplace[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 20]
landtag_subset$wk[landtag_subset$ran == 1 & landtag_subset$which_federal_election == 20]

all_candidates <- landtag_subset %>%
  dplyr::filter(ran == 1) %>%
  dplyr::select(entry_id, id, state, party_elected_with, person_title, which_federal_election,
                moved_up, wk, listplace)

#### Code Wahlkreis safety ####

# Binary safety: legislator's party was winner at t-1.
# Continuous safety: P(win t+1) at t-1.

fptp_data <- readRDS("fptp_data_germany_27092023.rds") %>%
  dplyr::mutate(party = recode(party, "AFD" = "AfD",
                               "B90" = "Grüne / B90 / AL / Neues Forum",
                               "GRU" = "Grüne / B90 / AL / Neues Forum",
                               "PIR" = "PIRATEN",
                               "NPD" = "DVU/NPD",
                               "PDS" = "LINKE/PDS",
                               "LIN" = "LINKE/PDS"))

# the relevant federal election is t+1 relative to the election for which the 
# safety was calculated: i.e. the safety of a seat given its margin in 1983 is
# the prospective safety for candidates running in 1987 (11th federal election)

fptp_data$which_federal_election <- NA
fptp_data$which_federal_election[fptp_data$year == 1983] <- 11
fptp_data$which_federal_election[fptp_data$year == 1987] <- 12
fptp_data$which_federal_election[fptp_data$year == 1990] <- 13
fptp_data$which_federal_election[fptp_data$year == 1994] <- 14
fptp_data$which_federal_election[fptp_data$year == 1998] <- 15
fptp_data$which_federal_election[fptp_data$year == 2002] <- 16
fptp_data$which_federal_election[fptp_data$year == 2005] <- 17
fptp_data$which_federal_election[fptp_data$year == 2009] <- 18
fptp_data$which_federal_election[fptp_data$year == 2013] <- 19
fptp_data$which_federal_election[fptp_data$year == 2017] <- 20

chunk <- left_join(
  all_candidates %>%
    dplyr::filter(wk != "List Only") %>%
    dplyr::mutate(wk = as.numeric(wk)) %>%
    dplyr::select(entry_id, wk, party_elected_with, which_federal_election),
  
  fptp_data %>%
    dplyr::mutate(winner = ifelse(margin > 0, 1, 0)) %>%
    dplyr::rename(party_elected_with = party,
                  wk = wkr_t_plus_1) %>%
    dplyr::select(wk, party_elected_with, winner, margin, prob_win_t_plus_1, which_federal_election),
  
  by = c("wk", "party_elected_with", "which_federal_election")) %>%
  dplyr::rename(margin_smd_t_minus_1 = margin,
                prob_win_smd = prob_win_t_plus_1,
                winner_smd_t_minus_1 = winner) %>%
  dplyr::select(-c(wk, party_elected_with))

all_candidates <- left_join(all_candidates, chunk, by = 
                              c("entry_id", "which_federal_election"))

all_candidates$winner_smd_t_minus_1[all_candidates$wk == "List Only"] <- 0
all_candidates$prob_win_smd[all_candidates$wk == "List Only"] <- 0

all_candidates$winner_smd_t_minus_1[all_candidates$party_elected_with %in% 
                                      c("DVU/NPD", "AfD", "Freie Wähler (Bayern)") 
                                    & is.na(all_candidates$winner_smd_t_minus_1)] <- 0


#### Code List position safety ####
# Binary safety: position was eligible at t-1.
# Continuous safety: P(position is eligible) at t. 

pr_data <- readRDS("pr_data_germany_27092023.rds") %>%
  dplyr::mutate(party = recode(party, "FREIE WÄHLER" = "Freie Wähler (Bayern)"))

pr_data$which_federal_election <- NA

pr_data$which_federal_election[pr_data$year == 1987] <- 11
pr_data$which_federal_election[pr_data$year == 1990] <- 12
pr_data$which_federal_election[pr_data$year == 1994] <- 13
pr_data$which_federal_election[pr_data$year == 1998] <- 14
pr_data$which_federal_election[pr_data$year == 2002] <- 15
pr_data$which_federal_election[pr_data$year == 2005] <- 16
pr_data$which_federal_election[pr_data$year == 2009] <- 17
pr_data$which_federal_election[pr_data$year == 2013] <- 18
pr_data$which_federal_election[pr_data$year == 2017] <- 19
pr_data$which_federal_election[pr_data$year == 2021] <- 20


chunk <- left_join(
  all_candidates %>%
    dplyr::filter(listplace != "SMD Only") %>%
    dplyr::mutate(listplace = as.numeric(listplace)) %>%
    dplyr::select(entry_id, listplace, party_elected_with, which_federal_election, state),
  
  pr_data %>%
    dplyr::mutate(eligible_t_minus_one = ifelse(position <= last_elected_t_minus_1, 1, 0)) %>%
    dplyr::rename(party_elected_with = party,
                  listplace = position) %>%
    dplyr::select(listplace, state, party_elected_with, eligible_t_minus_one, 
                  last_elected_t_minus_1, prob_win_t, which_federal_election, last_elected_t_minus_1, magnitude),
  
  by = c("listplace", "party_elected_with", "which_federal_election", "state")) %>%
  dplyr::rename(last_elected_pr_t_minus_1 = last_elected_t_minus_1,
                prob_win_pr = prob_win_t,
                eligible_pr_t_minus_1 = eligible_t_minus_one) %>%
  dplyr::select(-c(listplace, party_elected_with))

all_candidates <- left_join(all_candidates, chunk, by = 
                              c("entry_id", "which_federal_election", "state"))

all_candidates$eligible_pr_t_minus_1[all_candidates$listplace == "SMD Only"] <- 0
all_candidates$prob_win_pr[all_candidates$listplace == "SMD Only"] <- 0

all_candidates$joint_prob <- (all_candidates$prob_win_smd) + (1 - all_candidates$prob_win_smd)*all_candidates$prob_win_pr
all_candidates$joint_safety <- NA
all_candidates$joint_safety[all_candidates$eligible_pr_t_minus_1 == 0 & 
                              all_candidates$winner_smd_t_minus_1 == 0] <- 0
all_candidates$joint_safety[all_candidates$eligible_pr_t_minus_1 == 0 & 
                              all_candidates$winner_smd_t_minus_1 == 1] <- 1
all_candidates$joint_safety[all_candidates$eligible_pr_t_minus_1 == 1 & 
                              all_candidates$winner_smd_t_minus_1 == 0] <- 1
all_candidates$joint_safety[all_candidates$eligible_pr_t_minus_1 == 1 & 
                              all_candidates$winner_smd_t_minus_1 == 1] <- 1

# Visualise Safety (figure 2 in paper)

all_candidates2 <- all_candidates

all_candidates2$listplace2 <- NA
all_candidates2$listplace2[all_candidates2$listplace == "SMD Only"]  <- NA
all_candidates2$listplace2[all_candidates2$listplace != "SMD Only"]  <- as.numeric(as.character(all_candidates2$listplace[all_candidates2$listplace != "SMD Only"]))
all_candidates2$listplace3 <- all_candidates2$listplace2 - all_candidates2$last_elected_pr_t_minus_1 

plot(all_candidates$margin_smd_t_minus_1, all_candidates$prob_win_smd)

plot_smd <- ggplot(all_candidates2 %>% dplyr::filter(!is.na(winner_smd_t_minus_1))) + 
  geom_point(mapping = aes(x=margin_smd_t_minus_1, y = prob_win_smd, col = as.factor(winner_smd_t_minus_1),
                           shape = as.factor(winner_smd_t_minus_1))) +
  theme_minimal() + xlab("Party margin in district at t-1") + ylab("Continuous Candidacy Quality (SMD)") + 
  scale_color_manual(values = c("#882255", "#88CCEE"), labels = c("Insecure", "Secure"), name = "Categorical\nCandidacy\nQuality")+ 
  scale_shape_manual(values = c(16,15), labels = c("Insecure", "Secure"), name = "Categorical\nCandidacy\nQuality")+ 
  ggtitle("SMD Candidacy Quality, Continuous \n and Categorical Measures Compared \n Level-Hopping Attempts (1987-2021)") + 
  theme(plot.title = element_text(hjust = 0.5), legend.position = "n")

plot_list <- ggplot(all_candidates2 %>% dplyr::filter(!is.na(listplace3))) + 
  geom_point(mapping = aes(x=listplace3, y = prob_win_pr, col = as.factor(eligible_pr_t_minus_1),
                           shape = as.factor(eligible_pr_t_minus_1))) +
  theme_minimal() + xlab("Candidate's List Position - Last List Position Elected at t-1") + ylab("Continuous Candidacy Quality (List)") + 
  scale_color_manual(values = c("#882255", "#88CCEE"), labels = c("Insecure", "Secure"), name = "Categorical\nCandidacy\nQuality")+ 
  scale_shape_manual(values = c(16,15), labels = c("Insecure", "Secure"), name = "Categorical\nCandidacy\nQuality")+ 
  ggtitle("List-PR Candidacy Quality, Continuous \n and Categorical Measures Compared \n Level-Hopping Attempts (1987-2021)") + 
  theme(plot.title = element_text(hjust = 0.5), legend.position = "left") 

#+ annotate("text", x = -30, y = 0.125, label = "Joachim\nHerrmann", size = 3) 

(plot_smd | plot_list)

# produces figure 2 in paper
ggsave("cand_quality_measures_30082023.png", width = 11, height = 4.5)

# input information on candidates' candidacy quality into main dataset

landtag_subset <- left_join(landtag_subset, all_candidates %>% 
                              dplyr::select(id, which_federal_election, joint_prob, joint_safety),
                            by = c("id", "which_federal_election")) 

#### Descriptive Statistics: Level Hopping, figure 4 ####

desc <- landtag_subset %>%
  dplyr::filter(ran == 1) %>%
  dplyr::mutate(party_elected_with = ifelse(party_elected_with %in% c("CDU", "CSU"), "CDU/CSU", party_elected_with)) %>%
  dplyr::mutate(party_elected_with = ifelse(party_elected_with %in% c("Grüne / B90 / AL / Neues Forum"), "Grüne", party_elected_with)) %>%
  dplyr::group_by(party_elected_with, which_federal_election) %>%
  dplyr::summarise(candidacies = sum(ran),
                   successful_candidacies = sum(moved_up))%>%
  dplyr::filter(party_elected_with %in% c("AfD", "CDU/CSU", "FDP", "SPD", 
                                          "LINKE/PDS", "Grüne")) %>%
  dplyr::rename(party = party_elected_with)

desc1 <- rbind.data.frame(
  as.data.frame(table(candidacies87$party)) %>% dplyr::mutate(which_federal_election = 11),
  as.data.frame(table(candidacies90$party)) %>% dplyr::mutate(which_federal_election = 12),
  as.data.frame(table(candidacies94$party)) %>% dplyr::mutate(which_federal_election = 13),
  as.data.frame(table(candidacies98$party)) %>% dplyr::mutate(which_federal_election = 14),
  as.data.frame(table(candidacies02$party)) %>% dplyr::mutate(which_federal_election = 15),
  as.data.frame(table(candidacies05$party)) %>% dplyr::mutate(which_federal_election = 16),
  as.data.frame(table(candidacies09$party)) %>% dplyr::mutate(which_federal_election = 17),
  as.data.frame(table(candidacies13$party)) %>% dplyr::mutate(which_federal_election = 18),
  as.data.frame(table(candidacies17$party)) %>% dplyr::mutate(which_federal_election = 19),
  as.data.frame(table(candidacies21$party)) %>% dplyr::mutate(which_federal_election = 20),
  stringsAsFactors = FALSE) 

desc1$party <- as.character(desc1$Var1)
desc1$party[desc1$Var1 %in% c("CDU", "CSU")] <- "CDU/CSU"

desc1 <- desc1 %>%
  dplyr::group_by(party, which_federal_election) %>%
  dplyr::summarise(tot_candidacies = sum(Freq)) %>%
  dplyr::filter(party %in% c("AfD", "CDU/CSU", "FDP", "SPD", 
                             "LINKE/PDS", "Grüne / B90 / AL / Neues Forum")) %>%
  dplyr::mutate(party = ifelse(party %in% c("Grüne / B90 / AL / Neues Forum"), 
                               "Grüne", party))

desc2 <- full_join(desc, desc1, by = c("party", "which_federal_election")) %>%
  dplyr::filter(!is.na(candidacies))

desc2$share_candidacies <- desc2$candidacies/desc2$tot_candidacies

desc2$election <- NA
desc2$election[desc2$which_federal_election == 11] <- 1987
desc2$election[desc2$which_federal_election == 12] <- 1990
desc2$election[desc2$which_federal_election == 13] <- 1994
desc2$election[desc2$which_federal_election == 14] <- 1998
desc2$election[desc2$which_federal_election == 15] <- 2002
desc2$election[desc2$which_federal_election == 16] <- 2005
desc2$election[desc2$which_federal_election == 17] <- 2009
desc2$election[desc2$which_federal_election == 18] <- 2013
desc2$election[desc2$which_federal_election == 19] <- 2017
desc2$election[desc2$which_federal_election == 20] <- 2021

sum(desc2$candidacies)/sum(desc2$tot_candidacies)

desc3 <- rbind.data.frame(
  
  landtag %>% filter(in_landtag_for_11_fed_election == 1) %>%
    mutate(which_federal_election = 11),
  
  landtag %>% filter(in_landtag_for_12_fed_election == 1) %>%
    mutate(which_federal_election = 12),
  
  landtag %>% filter(in_landtag_for_13_fed_election == 1) %>%
    mutate(which_federal_election = 13),
  
  landtag %>% filter(in_landtag_for_14_fed_election == 1) %>%
    mutate(which_federal_election = 14),
  
  landtag %>% filter(in_landtag_for_15_fed_election == 1) %>%
    mutate(which_federal_election = 15),
  
  landtag %>% filter(in_landtag_for_16_fed_election == 1) %>%
    mutate(which_federal_election = 16),
  
  landtag %>% filter(in_landtag_for_17_fed_election == 1) %>%
    mutate(which_federal_election = 17),
  
  landtag %>% filter(in_landtag_for_18_fed_election == 1) %>%
    mutate(which_federal_election = 18),
  
  landtag %>% filter(in_landtag_for_19_fed_election == 1) %>%
    mutate(which_federal_election = 19),
  
  landtag %>% filter(in_landtag_for_20_fed_election == 1) %>%
    mutate(which_federal_election = 20))

desc3 <- desc3 %>% 
  dplyr::mutate(party_elected_with = ifelse(party_elected_with %in% c("CDU", "CSU"), "CDU/CSU", party_elected_with)) %>%
  dplyr::mutate(party_elected_with = ifelse(party_elected_with %in% c("Grüne / B90 / AL / Neues Forum"), "Grüne", party_elected_with)) %>%
  group_by(which_federal_election, 
           party_elected_with) %>%
  summarise(no_stateleg = n()) %>%
  dplyr::rename(party = party_elected_with) %>%
  dplyr::filter(party %in% c("AfD", "CDU/CSU", "FDP", "SPD", 
                             "LINKE/PDS", "Grüne")) 

bundestag$freshman <- 0
bundestag$freshman[bundestag$session == 11 &
                     bundestag$pageid %notin% unique(bundestag$pageid[bundestag$session == 10])] <- 1
bundestag$freshman[bundestag$session == 12 &
                     bundestag$pageid %notin% unique(bundestag$pageid[bundestag$session == 11])] <- 1
bundestag$freshman[bundestag$session == 13 &
                     bundestag$pageid %notin% unique(bundestag$pageid[bundestag$session == 12])] <- 1
bundestag$freshman[bundestag$session == 14 &
                     bundestag$pageid %notin% unique(bundestag$pageid[bundestag$session == 13])] <- 1
bundestag$freshman[bundestag$session == 15 &
                     bundestag$pageid %notin% unique(bundestag$pageid[bundestag$session == 14])] <- 1
bundestag$freshman[bundestag$session == 16 &
                     bundestag$pageid %notin% unique(bundestag$pageid[bundestag$session == 15])] <- 1
bundestag$freshman[bundestag$session == 17 &
                     bundestag$pageid %notin% unique(bundestag$pageid[bundestag$session == 16])] <- 1
bundestag$freshman[bundestag$session == 18 &
                     bundestag$pageid %notin% unique(bundestag$pageid[bundestag$session == 17])] <- 1
bundestag$freshman[bundestag$session == 19 &
                     bundestag$pageid %notin% unique(bundestag$pageid[bundestag$session == 18])] <- 1
bundestag$freshman[bundestag$session == 20 &
                     bundestag$pageid %notin% unique(bundestag$pageid[bundestag$session == 19])] <- 1

desc4 <- bundestag %>%
  dplyr::filter(freshman == 1) %>%
  dplyr::filter(constituency2 != "Volkskammer") %>%
  dplyr::group_by(session, party) %>%
  dplyr::summarise(num_newly_elected = sum(freshman))

desc4$party_elected_with <- as.character(desc4$party)
desc4$party_elected_with[desc4$party %in% c("AL", "BÜNDNIS 90/DIE GRÜNEN", "Greens")] <- "Grüne"
desc4$party_elected_with[desc4$party %in% c("CDU", "CSU")] <- "CDU/CSU"  
desc4$party_elected_with[desc4$party %in% c("PDS", "DIE LINKE", "Left")] <- "LINKE/PDS"  

desc4 <- desc4 %>%
  dplyr::filter(party_elected_with %in% c("AfD", "CDU/CSU", "FDP", "SPD", 
                                          "LINKE/PDS", "Grüne")) %>%
  dplyr::group_by(session, party_elected_with) %>%
  dplyr::summarise(num_newly_elected = sum(num_newly_elected)) %>%
  dplyr::rename(which_federal_election = session, 
                party = party_elected_with)

desc5 <- full_join(desc2, desc4, by = c("party", "which_federal_election")) 

desc5 <- full_join(desc5, desc3, by = c("party", "which_federal_election")) 


desc5$share_new <- desc5$successful_candidacies/desc5$num_newly_elected
desc5$share_leg <- desc5$candidacies/desc5$no_stateleg

plot1 <- ggplot(data = desc5, mapping = aes(y = as.factor(election), 
                                            x = candidacies, 
                                            group = party, 
                                            fill = party)) + 
  geom_bar(stat = "identity", position = "dodge", col = "black", lwd = 0.1) + 
  facet_wrap(~party, nrow = 1) + 
  scale_fill_manual(values = c("#0489DB", "#000000", "#FFEF00", 
                               "#1AA037", "#E3000F", "#A6006B"),
                    labels = c("AfD", "CDU/CSU", "FDP", 
                               "Grüne", "Die Linke/PDS", "SPD"), name = "Party") + 
  theme_minimal() + xlab("# State MPs candidacies") + 
  ylab("") + ggtitle("Number of Level-Hopping Attempts") + 
  theme(plot.title = element_text(hjust = 0)) + 
  theme(legend.position = "n")

plot2 <- ggplot(data = desc5, mapping = aes(y = as.factor(election), 
                                            x = successful_candidacies, 
                                            group = party, 
                                            fill = party)) + 
  geom_bar(stat = "identity", position = "dodge", col = "black", lwd = 0.1) + 
  facet_wrap(~party, nrow = 1) + 
  scale_fill_manual(values = c("#0489DB", "#000000", "#FFEF00", 
                               "#1AA037", "#E3000F", "#A6006B"),
                    labels = c("AfD", "CDU/CSU", "FDP", 
                               "Grüne", "Die Linke/PDS", "SPD"), name = "Party") + 
  theme_minimal() + xlab("# Sitting State legislators elected directly to the Bundestag") + 
  ylab("") + ggtitle("Number of Successful Level-Hopping Attempts") + 
  theme(plot.title = element_text(hjust = 0)) + 
  theme(legend.position = "n") 

plot3 <- ggplot(data = desc5, mapping = aes(y = as.factor(election), 
                                            x = no_stateleg, 
                                            group = party, 
                                            fill = party)) + 
  geom_bar(stat = "identity", position = "dodge", col = "black", lwd = 0.1) + 
  facet_wrap(~party, nrow = 1) + 
  scale_fill_manual(values = c("#0489DB", "#000000", "#FFEF00", 
                               "#1AA037", "#E3000F", "#A6006B"),
                    labels = c("AfD", "CDU/CSU", "FDP", 
                               "Grüne", "Die Linke/PDS", "SPD"), name = "Party") + 
  theme_minimal() + xlab("# State MPs on election day") + 
  ylab("") + ggtitle("Number of Sitting State MPs") + 
  theme(plot.title = element_text(hjust = 0))+ 
  theme(legend.position = "n") 

plot4 <- ggplot(data = desc5, mapping = aes(y = as.factor(election), 
                                            x = share_candidacies, 
                                            group = party, 
                                            fill = party)) + 
  geom_bar(stat = "identity", position = "dodge", col = "black", lwd = 0.1) + 
  facet_wrap(~party, nrow = 1) + 
  scale_fill_manual(values = c("#0489DB", "#000000", "#FFEF00", 
                               "#1AA037", "#E3000F", "#A6006B"),
                    labels = c("AfD", "CDU/CSU", "FDP", 
                               "Grüne", "Die Linke/PDS", "SPD"), name = "Party") + 
  theme_minimal() + xlab("# Level-Hoppers / # All Candidacies") + 
  ylab("") + ggtitle("Share of Candidates who are Level-Hoppers") + 
  theme(plot.title = element_text(hjust = 0)) + 
  scale_x_continuous(labels = scales::percent)+ 
  theme(legend.position = "n") 

plot5 <- ggplot(data = desc5, 
                mapping = aes(y = as.factor(election), 
                              x = share_new, 
                              group = party, 
                              fill = party)) + 
  geom_bar(stat = "identity", position = "dodge", col = "black", lwd = 0.1) + 
  facet_wrap(~party, nrow = 1) + 
  scale_fill_manual(values = c("#0489DB", "#000000", "#FFEF00", 
                               "#1AA037", "#E3000F", "#A6006B"),
                    labels = c("AfD", "CDU/CSU", "FDP", 
                               "Grüne", "Die Linke/PDS", "SPD"), name = "Party") + 
  theme_minimal() + xlab("# Level-Hoppers / # non-incumbent Federal MPs") + 
  ylab("") + ggtitle("Share of New Federal MPs who are Level-Hoppers") + 
  theme(plot.title = element_text(hjust = 0)) + 
  scale_x_continuous(labels = scales::percent, breaks = c(0, 0.3, 0.6, 0.9)) + 
  theme(legend.position = "n") 

plot6 <- ggplot(data = desc5, 
                mapping = aes(y = as.factor(election), 
                              x = share_leg, 
                              group = party, 
                              fill = party)) + 
  geom_bar(stat = "identity", position = "dodge", col = "black", lwd = 0.1) + 
  facet_wrap(~party, nrow = 1) + 
  scale_fill_manual(values = c("#0489DB", "#000000", "#FFEF00", 
                               "#1AA037", "#E3000F", "#A6006B"),
                    labels = c("AfD", "CDU/CSU", "FDP", 
                               "Grüne", "Die Linke/PDS", "SPD"), name = "Party") + 
  theme_minimal() + xlab("# Level-Hoppers / # State Legislators") + 
  ylab("") + ggtitle("Share of Sitting State MPs who are Level-Hoppers") + 
  theme(plot.title = element_text(hjust = 0)) + 
  theme(legend.position = "n") + 
  scale_x_continuous(labels = scales::percent, limits = c(0, 0.22)) 

(plot1)  /
  (plot2 ) /
  (plot3) /
  (plot4 ) /
  (plot5) / 
  (plot6)

# produces figure 4
ggsave("descriptive_plots_germany_party.jpg", height = 13, width = 10)

#### Descriptive Statistics: Level Hopping, figure 5 and appendix table ####

#Subset to legislatures for which candidacy quality is available

landtag_subset <- landtag_subset %>%
  dplyr::filter(state %notin% c("BE", "SN", "ST", "TH", "MV", "BB", "BY") | 
                  which_federal_election != 12) %>%
  dplyr::filter(state %notin% c("BE") | which_federal_election != 13)

# create candidacy quality variables

landtag_subset$joint_safety[landtag_subset$entry_id == "261574_18"] <- 0
# Patrick Breyer, Pirates Party (party didn't contest his SMD at t-1)
landtag_subset$ran_secure <- 0
landtag_subset$ran_secure[landtag_subset$joint_safety == 1] <- 1
landtag_subset$ran_insecure <- 0
landtag_subset$ran_insecure[landtag_subset$joint_safety == 0] <- 1


# create descriptive statistic dataframes (before further subsetting, to be plotted later)

plot_data_tier <- rbind.data.frame(
  landtag_subset %>%
    dplyr::group_by(tier_dummy) %>%
    summarise(count = n(),
              count_ran = sum(ran, na.rm = T),
              count_ran_s = sum(ran_secure, na.rm = T),
              count_ran_i = sum(ran_insecure, na.rm = T),
              count_moved_up = sum(moved_up, na.rm = T),
              avg_security = mean(joint_prob, na.rm = T))%>%
    dplyr::mutate(tier_dummy = c("SMD", "List PR (all states)")) ,
  
  landtag_subset %>%
    dplyr::filter(state %notin% c("HH", "HB", "SL")) %>%
    dplyr::group_by(tier_dummy) %>%
    summarise(count = n(),
              count_ran = sum(ran, na.rm = T),
              count_ran_s = sum(ran_secure, na.rm = T),
              count_ran_i = sum(ran_insecure, na.rm = T),
              count_moved_up = sum(moved_up, na.rm = T),
              avg_security = mean(joint_prob, na.rm = T)) %>%
    dplyr::filter(tier_dummy == "list") %>%
    dplyr::mutate(tier_dummy = "List PR (Mixed only)")) %>%
  mutate(tier_dummy = fct_relevel(tier_dummy, "SMD", "List PR (Mixed only)", "List PR (all states)")) %>%
  dplyr::rename(groups = tier_dummy) %>% dplyr::mutate(variable = "Tier of Election")


plot_data_state <- landtag_subset %>%
  dplyr::mutate(state = case_match(state, 
                                   "BB" ~ "Brandenburg", 
                                   "BE" ~ "Berlin",
                                   "BY" ~ "Bavaria",
                                   "BW" ~ "Baden Württemberg",
                                   "HB" ~ "Bremen",
                                   "HH" ~ "Hamburg", 
                                   "HE" ~ "Hessen",
                                   "MV" ~ "Mecklenburg WP",
                                   "NI" ~ "Lower Saxony",
                                   "NW" ~ "NR Westphalia",
                                   "RP" ~ "Rhineland Palatinate",
                                   "SH" ~ "Schleswig-Holstein",
                                   "SL" ~ "Saarland",
                                   "SN" ~ "Saxony", 
                                   "ST" ~ "Saxony-Anhalt",
                                   "TH" ~ "Thuringia",
                                   .default = state)) %>%
  dplyr::group_by(state) %>%
  summarise(count = n(),
            count_ran = sum(ran, na.rm = T),
            count_ran_s = sum(ran_secure, na.rm = T),
            count_ran_i = sum(ran_insecure, na.rm = T),
            count_moved_up = sum(moved_up, na.rm = T),
            avg_security = mean(joint_prob, na.rm = T)) 

order <- as.vector(unlist(plot_data_state %>% arrange(count_ran/count) %>% dplyr::select(state)))

plot_data_state <- plot_data_state %>%
  mutate(state = fct_relevel(state, order))%>%
  dplyr::rename(groups = state) %>% dplyr::mutate(variable = "State")



plot_data_party <- landtag_subset %>%
  dplyr::mutate(party_elected_short = case_match(party_elected_short, 
                                                 "Other Rad. Right (DVU/NPD/REP)" ~ "Minor Parties", 
                                                 .default = party_elected_short)) %>%
  dplyr::group_by(party_elected_short) %>%
  summarise(count = n(),
            count_ran = sum(ran, na.rm = T),
            count_ran_s = sum(ran_secure, na.rm = T),
            count_ran_i = sum(ran_insecure, na.rm = T),
            count_moved_up = sum(moved_up, na.rm = T),
            avg_security = mean(joint_prob, na.rm = T)) 

order <- as.vector(unlist(plot_data_party %>% arrange(count_ran/count) %>% dplyr::select(party_elected_short)))

plot_data_party <- plot_data_party %>%
  mutate(party_elected_short = fct_relevel(party_elected_short, order))%>%
  dplyr::rename(groups = party_elected_short) %>% dplyr::mutate(variable = "Party")


plot_data_gender <- landtag_subset %>%
  dplyr::mutate(gender = case_match(gender, "female" ~ "Female",
                                    "male" ~ "Male",
                                    .default = gender)) %>%
  dplyr::group_by(gender) %>%
  summarise(count = n(),
            count_ran = sum(ran, na.rm = T),
            count_ran_s = sum(ran_secure, na.rm = T),
            count_ran_i = sum(ran_insecure, na.rm = T),
            count_moved_up = sum(moved_up, na.rm = T),
            avg_security = mean(joint_prob, na.rm = T)) 

order <- as.vector(unlist(plot_data_gender %>% arrange(count_ran/count) %>% dplyr::select(gender)))

plot_data_gender <- plot_data_gender %>%
  mutate(gender = fct_relevel(gender, order))%>%
  dplyr::rename(groups = gender) %>% dplyr::mutate(variable = "Gender")


plot_data_govt <- landtag_subset %>%
  dplyr::mutate(pty_in_govt = as.character(pty_in_govt)) %>%
  dplyr::mutate(pty_in_govt = case_match(pty_in_govt, "0" ~ "In Opposition",
                                         "1" ~ "In Government",
                                         .default = pty_in_govt)) %>%
  dplyr::group_by(pty_in_govt) %>%
  summarise(count = n(),
            count_ran = sum(ran, na.rm = T),
            count_ran_s = sum(ran_secure, na.rm = T),
            count_ran_i = sum(ran_insecure, na.rm = T),
            count_moved_up = sum(moved_up, na.rm = T),
            avg_security = mean(joint_prob, na.rm = T)) 

order <- as.vector(unlist(plot_data_govt %>% arrange(count_ran/count) %>% dplyr::select(pty_in_govt)))

plot_data_govt <- plot_data_govt %>%
  mutate(pty_in_govt = fct_relevel(pty_in_govt, order))%>%
  dplyr::rename(groups = pty_in_govt) %>% dplyr::mutate(variable = "Landtag Government Status")


plot_data_seniority <- landtag_subset %>%
  dplyr::mutate(seniority_cat = case_when(seniority == 1 ~ "One-Term",
                                          seniority == 2 ~ "Two-Term",
                                          seniority %in% c(3,4) ~ "Three/Four-Term",
                                          seniority >= 5 ~ "Five-Term+",
                                          .default = NA)) %>%
  dplyr::group_by(seniority_cat) %>%
  summarise(count = n(),
            count_ran = sum(ran, na.rm = T),
            count_ran_s = sum(ran_secure, na.rm = T),
            count_ran_i = sum(ran_insecure, na.rm = T),
            count_moved_up = sum(moved_up, na.rm = T),
            avg_security = mean(joint_prob, na.rm = T)) 

order <- c( "Five-Term+", "Three/Four-Term", "Two-Term", "One-Term")

plot_data_seniority <- plot_data_seniority %>%
  mutate(seniority_cat = fct_relevel(seniority_cat, order))%>%
  dplyr::rename(groups = seniority_cat) %>% dplyr::mutate(variable = "Seniority")


plot_data_age <- landtag_subset %>%
  dplyr::mutate(age_cat = case_when(age < 40 ~ "< 40 years old",
                                    age >= 40 & age < 50 ~ "40-50 years old",
                                    age >= 50 & age < 60 ~ "50-60 years old",
                                    age >= 60 ~ "≥ 60 years old",
                                    .default = NA)) %>%
  dplyr::group_by(age_cat) %>%
  summarise(count = n(),
            count_ran = sum(ran, na.rm = T),
            count_ran_s = sum(ran_secure, na.rm = T),
            count_ran_i = sum(ran_insecure, na.rm = T),
            count_moved_up = sum(moved_up, na.rm = T),
            avg_security = mean(joint_prob, na.rm = T)) %>%
  dplyr::filter(!is.na(age_cat))

order <- c( "≥ 60 years old", "50-60 years old", "40-50 years old", "< 40 years old")

plot_data_age <- plot_data_age %>%
  mutate(age_cat = fct_relevel(age_cat, order))%>%
  dplyr::rename(groups = age_cat) %>% dplyr::mutate(variable = "Age")


plot_data_all <- landtag_subset %>%
  dplyr::mutate(groups = "Overall") %>%
  group_by(groups) %>%
  summarise(count = n(),
            count_ran = sum(ran, na.rm = T),
            count_ran_s = sum(ran_secure, na.rm = T),
            count_ran_i = sum(ran_insecure, na.rm = T),
            count_moved_up = sum(moved_up, na.rm = T),
            avg_security = mean(joint_prob, na.rm = T)) %>% 
  dplyr::mutate(variable = "Overall")

#### Add legislative positions ####

# filter to legislative terms for which we have legislative positions
landtag_subset <- landtag_subset %>%
  dplyr::filter(state == "BB" & 
                  which_federal_election %in% c(14:20) |
                  state == "BW" & 
                  which_federal_election %in% c(13:20) |
                  state == "BY" & 
                  which_federal_election %in% c(11,12, 14,15,16,17,19,20) |
                  state == "BE" & 
                  which_federal_election %in% c(13:20) |
                  state == "HH" & 
                  which_federal_election %in% c(13:20) |
                  state == "HB" & 
                  which_federal_election %in% c(13:20) |
                  state == "HE" & 
                  which_federal_election %in% c(11:20) |
                  state == "MV" & 
                  which_federal_election %in% c(13:20) |
                  state == "NI" & 
                  which_federal_election %in% c(13:20) |
                  state == "NW" & 
                  which_federal_election %in% c(13:20) |
                  state == "RP" & 
                  which_federal_election %in% c(13:20) |
                  state == "SL" & 
                  which_federal_election %in% c(14:16, 18:20) |
                  state == "SN" & 
                  which_federal_election %in% c(14:16, 18:20) |
                  state == "ST" & 
                  which_federal_election %in% c(13:20) |
                  state == "SH" & 
                  which_federal_election %in% c(13:20)|
                  state == "TH" & 
                  which_federal_election %in% c(13:16, 18:20))

# get in data on legislative positions

deputies <- readRDS("legpos1812.rds") %>% dplyr::filter(office %in% c("D", "G")) %>%
  dplyr::filter(!is.na(entry_id)) %>%
  distinct(paste(entry_id, which_federal_election), .keep_all = T)
deputies$entry_id[duplicated(paste(deputies$entry_id, 
                                   deputies$which_federal_election))]
deputies$deputy_or_cw <- 1
deputies <- deputies %>%
  dplyr::select(entry_id, which_federal_election, deputy_or_cw)

chairs <- readRDS("legpos1812.rds") %>% dplyr::filter(office %in% c("V")) %>%
  dplyr::filter(!is.na(entry_id)) %>%
  distinct(paste(entry_id, which_federal_election), .keep_all = T)
chairs$entry_id[duplicated(paste(chairs$entry_id, 
                                 chairs$which_federal_election))]
chairs$chair <- 1
chairs <- chairs %>%
  dplyr::select(entry_id, which_federal_election, chair)

leaders <- readRDS("legpos1812.rds") %>% dplyr::filter(office %in% c("F")) %>%
  dplyr::filter(!is.na(entry_id)) %>%
  distinct(paste(entry_id, which_federal_election), .keep_all = T)
leaders$entry_id[duplicated(paste(leaders$entry_id, 
                                  leaders$which_federal_election))]
leaders$party_leader <- 1
leaders <- leaders %>%
  dplyr::select(entry_id, which_federal_election, party_leader)

presidents <- readRDS("legpos1812.rds") %>% dplyr::filter(office %in% c("V", "S")) %>%
  dplyr::filter(!is.na(entry_id)) %>%
  distinct(paste(entry_id, which_federal_election), .keep_all = T)
presidents$entry_id[duplicated(paste(presidents$entry_id, 
                                     presidents$which_federal_election))]
presidents$pres_or_vp <- 1
presidents <- presidents %>%
  dplyr::select(entry_id, which_federal_election, pres_or_vp)

landtag_subset <- left_join(left_join(left_join(left_join(landtag_subset, deputies, by = c("entry_id", "which_federal_election")), 
                                                chairs, by = c("entry_id", "which_federal_election")), 
                                      presidents, by = c("entry_id", "which_federal_election")), 
                            leaders, by = c("entry_id", "which_federal_election"))

landtag_subset$deputy_or_cw[is.na(landtag_subset$deputy_or_cw)] <- 0
landtag_subset$chair[is.na(landtag_subset$chair)] <- 0
landtag_subset$pres_or_vp[is.na(landtag_subset$pres_or_vp)] <- 0
landtag_subset$party_leader[is.na(landtag_subset$party_leader)] <- 0

# get in data on executive positions

expos2 <- readRDS("expos1812.rds") %>%
  dplyr::filter(!is.na(id)) %>%
  distinct(paste(id, which_federal_election), .keep_all = T) %>%
  dplyr::mutate(exec_position = 1) %>%
  dplyr::select(id, which_federal_election, exec_position)

landtag_subset <- left_join(landtag_subset, expos2, by = c("id", "which_federal_election"))

landtag_subset$exec_position[is.na(landtag_subset$exec_position)] <- 0
landtag_subset$party_position <- 0
landtag_subset$party_position[landtag_subset$party_leader == 1 | landtag_subset$deputy_or_cw == 1] <- 1
landtag_subset$legislative_position <- 0
landtag_subset$legislative_position[landtag_subset$pres_or_vp == 1 | landtag_subset$chair == 1] <- 1

#create figure 5 #

plot_data_positions <- rbind.data.frame(
  landtag_subset %>%
    dplyr::mutate(exec_position = as.character(exec_position)) %>%
    dplyr::mutate(exec_position = case_match(exec_position, "1" ~ "Executive Position",
                                             .default = NA)) %>%
    dplyr::group_by(exec_position) %>%
    summarise(count = n(),
              count_ran = sum(ran, na.rm = T),
              count_ran_s = sum(ran_secure, na.rm = T),
              count_ran_i = sum(ran_insecure, na.rm = T),
              count_moved_up = sum(moved_up, na.rm = T),
              avg_security = mean(joint_prob, na.rm = T)) %>%
    dplyr::filter(!is.na(exec_position))%>%
    dplyr::rename(groups = exec_position),
  
  landtag_subset %>%
    dplyr::mutate(party_position = as.character(party_position)) %>%
    dplyr::mutate(party_position = case_match(party_position, "1" ~ "State Party Position",
                                              .default = NA)) %>%
    dplyr::group_by(party_position) %>%
    summarise(count = n(),
              count_ran = sum(ran, na.rm = T),
              count_ran_s = sum(ran_secure, na.rm = T),
              count_ran_i = sum(ran_insecure, na.rm = T),
              count_moved_up = sum(moved_up, na.rm = T),
              avg_security = mean(joint_prob, na.rm = T)) %>%
    dplyr::filter(!is.na(party_position))%>%
    dplyr::rename(groups = party_position),
  
  landtag_subset %>%
    dplyr::mutate(legislative_position = as.character(legislative_position)) %>%
    dplyr::mutate(legislative_position = case_match(legislative_position, "1" ~ "Legislative Position",
                                                    .default = NA)) %>%
    dplyr::group_by(legislative_position) %>%
    summarise(count = n(),
              count_ran = sum(ran, na.rm = T),
              count_ran_s = sum(ran_secure, na.rm = T),
              count_ran_i = sum(ran_insecure, na.rm = T),
              count_moved_up = sum(moved_up, na.rm = T),
              avg_security = mean(joint_prob, na.rm = T)) %>%
    dplyr::filter(!is.na(legislative_position)) %>%
    dplyr::rename(groups = legislative_position),
  
  landtag_subset %>%
    dplyr::mutate(backbencher = ifelse(legislative_position == 0 & party_position == 0 & exec_position == 0, "None", NA)) %>%
    dplyr::group_by(backbencher) %>%
    summarise(count = n(),
              count_ran = sum(ran, na.rm = T),
              count_ran_s = sum(ran_secure, na.rm = T),
              count_ran_i = sum(ran_insecure, na.rm = T),
              count_moved_up = sum(moved_up, na.rm = T),
              avg_security = mean(joint_prob, na.rm = T)) %>%
    dplyr::filter(!is.na(backbencher)) %>%
    dplyr::rename(groups = backbencher)
  
)

order <- as.vector(unlist(plot_data_positions %>% arrange(count_ran/count) %>% dplyr::select(groups)))

plot_data_positions <- plot_data_positions %>%
  mutate(groups = fct_relevel(groups, order)) %>% 
  dplyr::mutate(variable = "State-Level Office Held")


plot_data <- rbind.data.frame(plot_data_tier, plot_data_state, plot_data_party, 
                              plot_data_gender, plot_data_govt, plot_data_positions,
                              plot_data_seniority, plot_data_age) %>%
  mutate(variable = fct_relevel(variable, c("Tier of Election", "State", "Party", "Gender", 
                                            "Landtag Government Status", "State-Level Office Held",
                                            "Seniority", "Age")))

ggplot(data = plot_data, mapping = aes(fill = c("Insecure", "Secure"))) +
  geom_bar(stat="identity", mapping = aes(y = groups, x = count_ran/count, fill = "a")) + 
  geom_bar(stat="identity", mapping = aes(y = groups, x = count_ran_s/count, fill = "b")) + ylab("") + xlab("") + 
  ggforce::facet_col(~variable, scales = 'free_y', space = 'free') + 
  xlab("Level-Hopping Attempts as a Share of Sitting State Legislators") + 
  theme_minimal() + 
  scale_fill_manual(values = c("#882255", "#88CCEE"), labels = c("Insecure", "Secure"),
                    name = "Candidacy Quality (Categorical)") +
  ggtitle("Aggregate Level-Hopping Attempt Rates") + 
  theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5)) 

# produces figure 5 in paper
ggsave("level_hopping_descriptives.png", height = 11.5, width = 8)

table_data <- rbind.data.frame(plot_data_tier, plot_data_state, plot_data_party, 
                               plot_data_gender, plot_data_govt, plot_data_positions,
                               plot_data_seniority, plot_data_age, plot_data_all) %>%
  mutate(variable = fct_relevel(variable, c("Tier of Election", "State", "Party", "Gender", 
                                            "Landtag Government Status", "State-Level Office Held",
                                            "Seniority", "Age", "Overall"))) %>%
  dplyr::mutate(candidacies = paste(round(count_ran, 0 ), " (", round(100*(count_ran/count), 1 ), "%)", sep = ""))%>%
  dplyr::mutate(insecure = paste(round(100*(count_ran_i/count_ran), 1 ), "%", sep = ""))%>%
  dplyr::mutate(secure = paste(round(100*(count_ran_s/count_ran), 1 ), "%", sep = "")) %>%
  dplyr::mutate(moved_up = paste(round(100*(count_moved_up/count_ran), 1 ), "%", sep = "")) %>%
  dplyr::select(groups, count, candidacies, insecure, secure, moved_up, avg_security)

# produces descriptive table (second table in Appendix B)
print(xtable(table_data), include.rownames=FALSE)

#### Regression Models ####

# Main logit

landtag_subset$tier_dummy <- relevel(as.factor(landtag_subset$tier_dummy), ref = "SMD")
landtag_subset$time_since_last_election_years <- landtag_subset$time_since_last_election/365

fit1 <- glm(data = landtag_subset, ran  ~ 
              tier_dummy +
              exec_position +
              legislative_position + 
              party_position + 
              pty_in_govt + 
              age + I(age^2) +
              seniority + I(seniority^2) + 
              pv + time_since_last_election_years +
              gender +
              state + party_elected_short +
              as.factor(which_federal_election)
            , family = binomial(link = "logit"))
summary(fit1)

robust_se_fit1 <- vcovHC(fit1, type = 'HC1')
fit1_robust_se <- coeftest(fit1, robust_se_fit1)

fit2 <- glm(data = landtag_subset, ran  ~ 
              tier_dummy +
              exec_position +
              legislative_position + 
              party_position + 
              pty_in_govt + 
              age + I(age^2) +
              seniority + I(seniority^2) + 
              pv + time_since_last_election_years +
              gender +
              state*party_elected_short + as.factor(which_federal_election)
            , family = binomial(link = "logit"))
summary(fit2)

robust_se_fit2 <- vcovHC(fit2, type = 'HC1')
fit2_robust_se <- coeftest(fit2, robust_se_fit2)

fit3 <- glm(data = landtag_subset, ran  ~ 
              tier_dummy +
              exec_position +
              legislative_position + 
              party_position + 
              pty_in_govt + 
              age + I(age^2) +
              seniority + I(seniority^2) + 
              pv + time_since_last_election_years +
              gender +
              state + party_elected_short*as.factor(which_federal_election)
            , family = binomial(link = "logit"))
summary(fit3)

robust_se_fit3 <- vcovHC(fit3, type = 'HC1')
fit3_robust_se <- coeftest(fit3, robust_se_fit3)

fit4 <- glm(data = subset(landtag_subset, state %notin% c("HH", "HB", "SL")), ran  ~ 
              tier_dummy +
              exec_position +
              legislative_position + 
              party_position + 
              pty_in_govt + 
              age + I(age^2) +
              seniority + I(seniority^2) + 
              pv + time_since_last_election_years +
              gender +
              state + party_elected_short +
              as.factor(which_federal_election)
            , family = binomial(link = "logit"))
summary(fit4)

robust_se_fit4 <- vcovHC(fit4, type = 'HC1')
fit4_robust_se <- coeftest(fit4, robust_se_fit4)

fit5 <- glm(data = subset(landtag_subset, state %notin% c("HH", "HB", "SL")), ran  ~ 
              tier_dummy +
              exec_position +
              legislative_position + 
              party_position + 
              pty_in_govt + 
              age + I(age^2) +
              seniority + I(seniority^2) + 
              pv + time_since_last_election_years +
              gender +
              state*party_elected_short + as.factor(which_federal_election)
            , family = binomial(link = "logit"))
summary(fit5)

robust_se_fit5 <- vcovHC(fit5, type = 'HC1')
fit5_robust_se <- coeftest(fit5, robust_se_fit5)

fit6 <- glm(data = subset(landtag_subset, state %notin% c("HH", "HB", "SL")), ran  ~ 
              tier_dummy +
              exec_position +
              legislative_position + 
              party_position + 
              pty_in_govt + 
              age + I(age^2) +
              seniority + I(seniority^2) + 
              pv + time_since_last_election_years +
              gender +
              state + party_elected_short*as.factor(which_federal_election)
            , family = binomial(link = "logit"))
summary(fit6)

robust_se_fit6 <- vcovHC(fit6, type = 'HC1')
fit6_robust_se <- coeftest(fit6, robust_se_fit6)

# Reproduces logistic regression tables (table 4 in paper)
stargazer(fit1_robust_se, fit2_robust_se, fit3_robust_se, digits = 2, single.row = T)
stargazer(fit4_robust_se, fit5_robust_se, fit6_robust_se, digits = 2, single.row = T)

c(round(DescTools::PseudoR2(fit1, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit2, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit3, which = "Nagelkerke"),3))
c(round(DescTools::PseudoR2(fit4, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit5, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit6, which = "Nagelkerke"),3))

nobs(fit1) 
nobs(fit4)

#### AME plot (figure 6) ####

me1 <- summary(margins::margins(fit1, variables = c("tier_dummy", "exec_position",
                                                    "legislative_position", "party_position",
                                                    "pv", "gender", "pty_in_govt", "time_since_last_election_years"),
                                
                                vcov = robust_se_fit1))

me2 <- summary(margins::margins(fit2, variables = c("tier_dummy", "exec_position",
                                                    "legislative_position", "party_position",
                                                    "pv", "gender", "pty_in_govt","time_since_last_election_years"),
                                vcov = robust_se_fit2))

me3 <- summary(margins::margins(fit3, variables = c("tier_dummy", "exec_position",
                                                    "legislative_position", "party_position",
                                                    "pv", "gender", "pty_in_govt","time_since_last_election_years"),
                                
                                vcov = robust_se_fit3))

me4 <- summary(margins::margins(fit4, variables = c("tier_dummy", "exec_position",
                                                    "legislative_position", "party_position",
                                                    "pv", "gender", "pty_in_govt","time_since_last_election_years"),
                                
                                vcov = robust_se_fit4))

me5 <- summary(margins::margins(fit5, variables = c("tier_dummy", "exec_position",
                                                    "legislative_position", "party_position",
                                                    "pv", "gender", "pty_in_govt","time_since_last_election_years"),
                                
                                vcov = robust_se_fit5))

me6 <- summary(margins::margins(fit6, variables = c("tier_dummy", "exec_position",
                                                    "legislative_position", "party_position",
                                                    "pv", "gender", "pty_in_govt","time_since_last_election_years"),
                                
                                vcov = robust_se_fit6))

me_combined <- rbind.data.frame(as_tibble(me1) %>% mutate(model = "Model 1"),
                                as_tibble(me2) %>% mutate(model = "Model 2"),
                                as_tibble(me3) %>% mutate(model = "Model 3"),
                                as_tibble(me4) %>% mutate(model = "Model 4"),
                                as_tibble(me5) %>% mutate(model = "Model 5"),
                                as_tibble(me6) %>% mutate(model = "Model 6")) %>%
  dplyr::mutate(factor = case_match(factor, 
                                    "exec_position" ~ "Executive \n Position", 
                                    "gendermale" ~ "Gender (Male)",
                                    "legislative_position" ~ "Legislative \n Position",
                                    "party_position" ~ "Party Leadership \n Position",
                                    "pv" ~ "Preferential \n Voting",
                                    "tier_dummylist" ~ "List PR", 
                                    "pty_in_govt" ~ "Party In \n State Govt.",
                                    "time_since_last_election_years" ~ "Time since last \n election (1 year)",
                                    .default = factor)) %>%
  dplyr::mutate(model = fct_relevel(model, c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6")))%>%
  dplyr::mutate(factor = fct_relevel(factor, c("Gender (Male)", "Time since last \n election (1 year)", "Preferential \n Voting", "Party In \n State Govt.",
                                               "Party Leadership \n Position", "Legislative \n Position", "Executive \n Position", "List PR")))

#### Age and Seniority (Predicted and AMEs) plots for appendix ####

age_pred_fit1 <- as_tibble(ggeffects::ggpredict(fit1, terms = list(age = seq(19,85, 0.1)),
                                                condition = c(
                                                  party_elected_short = "CDU/CSU",
                                                  state = "BE", 
                                                  which_federal_election = 20),
                                                vcov.type = "HC1")) %>%
  dplyr::mutate(variable = "Age", model = "Model 1")

age_pred_fit2 <-  as_tibble(ggeffects::ggpredict(fit2, terms = list(age = seq(19,85, 0.1)),
                                                 condition = c(
                                                   party_elected_short = "CDU/CSU",
                                                   state = "BE", 
                                                   which_federal_election = 20),
                                                 vcov.type = "HC1"))%>%
  dplyr::mutate(variable = "Age", model = "Model 2")

age_pred_fit3 <-  as_tibble(ggeffects::ggpredict(fit3, terms = list(age = seq(19,85, 0.1)),
                                                 condition = c(
                                                   party_elected_short = "CDU/CSU",
                                                   state = "BE", 
                                                   which_federal_election = 20),
                                                 vcov.type = "HC1")) %>%
  dplyr::mutate(variable = "Age", model = "Model 3")

age_pred_fit4 <-  as_tibble(ggeffects::ggpredict(fit4, terms = list(age = seq(19,85, 0.1)),
                                                 condition = c(
                                                   party_elected_short = "CDU/CSU",
                                                   state = "BE", 
                                                   which_federal_election = 20),
                                                 vcov.type = "HC1")) %>%
  dplyr::mutate(variable = "Age", model = "Model 4")

age_pred_fit5 <-  as_tibble(ggeffects::ggpredict(fit5, terms = list(age = seq(19,85, 0.1)),
                                                 condition = c(
                                                   party_elected_short = "CDU/CSU",
                                                   state = "BE", 
                                                   which_federal_election = 20),
                                                 vcov.type = "HC1"))%>%
  dplyr::mutate(variable = "Age", model = "Model 5")

age_pred_fit6 <-  as_tibble(ggeffects::ggpredict(fit6, terms = list(age = seq(19,85, 0.1)),
                                                 condition = c(
                                                   party_elected_short = "CDU/CSU",
                                                   state = "BE", 
                                                   which_federal_election = 20),
                                                 vcov.type = "HC1")) %>%
  dplyr::mutate(variable = "Age", model = "Model 6")


seniority_pred_fit1 <- as_tibble(ggeffects::ggpredict(fit1, terms = list(seniority = seq(1,12,1)),
                                                      condition = c(
                                                        party_elected_short = "CDU/CSU",
                                                        state = "BE", 
                                                        which_federal_election = 20),
                                                      vcov.type = "HC1")) %>%
  dplyr::mutate(variable = "Seniority", model = "Model 1")

seniority_pred_fit2 <-  as_tibble(ggeffects::ggpredict(fit2, terms = list(seniority = seq(1,12,1)),
                                                       condition = c(
                                                         party_elected_short = "CDU/CSU",
                                                         state = "BE", 
                                                         which_federal_election = 20),
                                                       vcov.type = "HC1"))%>%
  dplyr::mutate(variable = "seniority", model = "Model 2")

seniority_pred_fit3 <-  as_tibble(ggeffects::ggpredict(fit3, terms = list(seniority = seq(1,12,1)),
                                                       condition = c(
                                                         party_elected_short = "CDU/CSU",
                                                         state = "BE", 
                                                         which_federal_election = 20),
                                                       vcov.type = "HC1")) %>%
  dplyr::mutate(variable = "Seniority", model = "Model 3")

seniority_pred_fit4 <-  as_tibble(ggeffects::ggpredict(fit4, terms = list(seniority = seq(1,12,1)),
                                                       condition = c(
                                                         party_elected_short = "CDU/CSU",
                                                         state = "BE", 
                                                         which_federal_election = 20),
                                                       vcov.type = "HC1")) %>%
  dplyr::mutate(variable = "Seniority", model = "Model 4")

seniority_pred_fit5 <-  as_tibble(ggeffects::ggpredict(fit5, terms = list(seniority = seq(1,12,1)),
                                                       condition = c(
                                                         party_elected_short = "CDU/CSU",
                                                         state = "BE", 
                                                         which_federal_election = 20),
                                                       vcov.type = "HC1"))%>%
  dplyr::mutate(variable = "Seniority", model = "Model 5")

seniority_pred_fit6 <-  as_tibble(ggeffects::ggpredict(fit6, terms = list(seniority = seq(1,12,1)),
                                                       condition = c(
                                                         party_elected_short = "CDU/CSU",
                                                         state = "BE", 
                                                         which_federal_election = 20),
                                                       vcov.type = "HC1")) %>%
  dplyr::mutate(variable = "Seniority", model = "Model 6")


age_pred_fit <- rbind.data.frame(age_pred_fit1,age_pred_fit2,age_pred_fit3,
                                 age_pred_fit4,age_pred_fit5,age_pred_fit6)

seniority_pred_fit <- rbind.data.frame(seniority_pred_fit1,seniority_pred_fit2,seniority_pred_fit3,
                                       seniority_pred_fit4,seniority_pred_fit5,seniority_pred_fit6)

pred_age_plot <- ggplot() + 
  geom_line(data = age_pred_fit, mapping = aes(x = x, y = predicted)) + 
  geom_line(data = age_pred_fit, mapping = aes(x = x, y = conf.low), lty = 2) + 
  geom_line(data = age_pred_fit, mapping = aes(x = x, y = conf.high), lty = 2) + 
  facet_wrap(~model) + theme_minimal() + xlab("age") + ylab("Predicted Probability") + 
  ggtitle("Predicted Probability of Level-Hopping Attempts by Age") + 
  theme(plot.title = element_text(hjust = 0.5))+ xlab("")

pred_sen_plot <- ggplot() + 
  geom_line(data = seniority_pred_fit, mapping = aes(x = x, y = predicted)) + 
  geom_line(data = seniority_pred_fit, mapping = aes(x = x, y = conf.low), lty = 2) + 
  geom_line(data = seniority_pred_fit, mapping = aes(x = x, y = conf.high), lty = 2) + 
  facet_wrap(~model) + theme_minimal() + xlab("Seniority") + ylab("Predicted Probability") + 
  ggtitle("Predicted Probability of Level-Hopping Attempts by Seniority") + 
  scale_x_continuous(breaks = 1:12) + 
  theme(plot.title = element_text(hjust = 0.5))+ xlab("")



me1_age <- summary(margins::margins(fit1, variables = "age", 
                                    at = list(age = seq(20,80, by = 10)),
                                    vcov = robust_se_fit1))

me2_age <- summary(margins::margins(fit2, variables = "age", 
                                    at = list(age = seq(20,80, by = 10)),
                                    vcov = robust_se_fit2))

me3_age <- summary(margins::margins(fit3, variables = "age", 
                                    at = list(age = seq(20,80, by = 10)),
                                    vcov = robust_se_fit3))

me4_age <- summary(margins::margins(fit4, variables = "age", 
                                    at = list(age = seq(20,80, by = 10)),
                                    vcov = robust_se_fit4))

me5_age <- summary(margins::margins(fit5, variables = "age", 
                                    at = list(age = seq(20,80, by = 10)),
                                    vcov = robust_se_fit5))

me6_age <- summary(margins::margins(fit6, variables = "age", 
                                    at = list(age = seq(20,80, by = 10)),
                                    vcov = robust_se_fit6))

me_age_combined <- rbind.data.frame(me1_age %>% mutate(model = "Model 1"),
                                    me2_age %>% mutate(model = "Model 2"),
                                    me3_age %>% mutate(model = "Model 3"),
                                    me4_age %>% mutate(model = "Model 4"),
                                    me5_age %>% mutate(model = "Model 5"),
                                    me6_age %>% mutate(model = "Model 6"))%>%
  dplyr::mutate(model = fct_relevel(model, c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6")))

ame_age_plot <- ggplot(data = me_age_combined, mapping = aes(x = AME, y = as.factor(age))) + 
  geom_point() + geom_vline(xintercept = 0, lty = 2, lwd = 0.25) + facet_wrap(~model) + 
  geom_errorbar(aes(xmin=lower,xmax=upper), width = 0) + theme_minimal() + 
  ylab("") +   ggtitle("Average Marginal Effect of Age \n (Models 1-6 in table 5)") + 
  theme(plot.title = element_text(hjust = 0.5)) + xlab("Average Marginal Effect of Age (+1 year) \n at Different Values of Age")



me1_seniority <- summary(margins::margins(fit1, variables = "seniority", 
                                          at = list(seniority = seq(1,7, by = 1)),
                                          vcov = robust_se_fit1))

me2_seniority <- summary(margins::margins(fit2, variables = "seniority", 
                                          at = list(seniority = seq(1,7, by = 1)),
                                          vcov = robust_se_fit2))

me3_seniority <- summary(margins::margins(fit3, variables = "seniority", 
                                          at = list(seniority = seq(1,7, by = 1)),
                                          vcov = robust_se_fit3))

me4_seniority <- summary(margins::margins(fit4, variables = "seniority", 
                                          at = list(seniority = seq(1,7, by = 1)),
                                          vcov = robust_se_fit4))

me5_seniority <- summary(margins::margins(fit5, variables = "seniority", 
                                          at = list(seniority = seq(1,7, by = 1)),
                                          vcov = robust_se_fit5))

me6_seniority <- summary(margins::margins(fit6, variables = "seniority", 
                                          at = list(seniority = seq(1,7, by = 1)),
                                          vcov = robust_se_fit6))

me_seniority_combined <- rbind.data.frame(me1_seniority %>% mutate(model = "Model 1"),
                                          me2_seniority %>% mutate(model = "Model 2"),
                                          me3_seniority %>% mutate(model = "Model 3"),
                                          me4_seniority %>% mutate(model = "Model 4"),
                                          me5_seniority %>% mutate(model = "Model 5"),
                                          me6_seniority %>% mutate(model = "Model 6"))%>%
  dplyr::mutate(model = fct_relevel(model, c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6")))

ame_sen_plot <- ggplot(data = me_seniority_combined, mapping = aes(x = AME, y = as.factor(seniority))) + 
  geom_point() + geom_vline(xintercept = 0, lty = 2, lwd = 0.25) + facet_wrap(~model) + 
  geom_errorbar(aes(xmin=lower,xmax=upper), width = 0) + theme_minimal() + 
  ylab("") +   ggtitle("Average Marginal Effect of Seniority \n (Models 1-6 in table 5)") + 
  theme(plot.title = element_text(hjust = 0.5)) + xlab("Average Marginal Effect of Seniority (+1 term) \n at Different Values of Seniority")


(pred_age_plot)/
  (pred_sen_plot)
# produces first table in appendix C
ggsave("pred_agesen_plot.png", width = 9, height = 12)


(ame_age_plot)/
  (ame_sen_plot)
# produces second table in appendix C
ggsave("ame_agesen_plot.png", width = 9, height = 12)


# Logistic Model AME (figure 6 in paper)

both_pred_fit <- rbind.data.frame(seniority_pred_fit, age_pred_fit) %>% dplyr::filter(model == "Model 1")

pred_age_sen_mod1 <- ggplot() + 
  geom_line(data = both_pred_fit, mapping = aes(x = x, y = predicted)) + 
  geom_line(data = both_pred_fit, mapping = aes(x = x, y = conf.low), lty = 2) + 
  geom_line(data = both_pred_fit, mapping = aes(x = x, y = conf.high), lty = 2) + 
  theme_minimal() + facet_wrap(~variable, scales = "free_x", strip.position = "bottom") + 
  theme(strip.placement = "outside") + 
  ggh4x::facetted_pos_scales(x = list(
    variable == "Age" ~ scale_x_continuous(breaks = seq(20, 85, by = 5)),
    variable == "Seniority" ~ scale_x_continuous(breaks = seq(1, 12, by = 1))
  )) + ylab("Predicted Probability") +   ggtitle("Predicted Probability of Level-Hopping Attempt, \nConditional on Age and Seniority (Model 1)") + 
  theme(plot.title = element_text(hjust = 0.5))+ xlab("")

#AME plot
ame_logit <- ggplot(data = me_combined, mapping = aes(x = AME, y = factor, group = model)) + 
  geom_point(aes(x = AME, y = factor, group = model), position = position_dodge(.65)) + 
  geom_errorbar(aes(xmin=lower,xmax=upper), position = position_dodge(.65), width = 0) +
  theme_minimal() + xlab("Average Marginal Effects") + ylab("") +
  geom_vline(xintercept = 0, lty = 2, lwd = 0.25) + facet_wrap(~model) + 
  ggtitle("Average Marginal Effects (Models 1-6)") + 
  scale_x_continuous(breaks = c(-0.04, -0.02, 0.00, 0.02, 0.04)) + 
  theme(plot.title = element_text(hjust = 0.5)) 


design <- c(patchwork::area(1, 1, 4, 5),
            patchwork::area(5,1,5,5))

(ame_logit) /
  (pred_age_sen_mod1) +
  plot_layout(design = design)

# produces figure 6
ggsave("ame_logit_plot.png", width = 9, height = 12)

# Main multinomial logit

landtag_subset$index <- paste(landtag_subset$entry_id, landtag_subset$which_federal_election)

landtag_subset$ran_type <- "Didn't Run"
landtag_subset$ran_type[landtag_subset$ran_secure == 1] <- "Ran Secure"
landtag_subset$ran_type[landtag_subset$ran_insecure == 1] <- "Ran Insecure"

data <- dfidx(data = landtag_subset,
              idx = list(c("index", "which_federal_election")), choice="ran_type",
              shape="wide", 
              alt.levels = c("Didn't Run","Ran Secure","Ran Insecure"))

fit7 <- mlogit(ran_type ~ 1 | tier_dummy +
                 exec_position +
                 legislative_position +
                 party_position +
                 pty_in_govt + 
                 age + I(age^2)+
                 seniority + I(seniority^2) + 
                 pv + 
                 time_since_last_election_years +
                 gender +
                 party_elected_short + 
                 state + 
                 as.factor(which_federal_election), 
               data = data, tol = 0.00001, 
               unscaled = T)
summary(fit7)

data2 <- dfidx(data = subset(landtag_subset, state %notin% c("HH", "HB", "SL")),
               idx = list(c("index", "which_federal_election")), choice="ran_type",
               shape="wide", 
               alt.levels = c("Didn't Run","Ran Secure","Ran Insecure"))

fit8 <- mlogit(ran_type ~ 1 | tier_dummy +
                 exec_position +
                 legislative_position +
                 party_position +
                 pty_in_govt + 
                 age + I(age^2)+
                 seniority + I(seniority^2) + 
                 pv + 
                 time_since_last_election_years +
                 gender +
                 party_elected_short + 
                 state + 
                 as.factor(which_federal_election), 
               data = data2, tol = 0.00001, 
               unscaled = T)
summary(fit8)

sandwich(fit8)
stargazer(fit8, coeftest(fit8, sandwich(fit8)), type = "text")
stargazer(fit8, type = "text")

# produces table 5
# presentation of table estimates adjusted manually from stargazer output
stargazer(fit7, coeftest(fit7, sandwich(fit7)), digits = 3, single.row = T)
stargazer(fit8, coeftest(fit8, sandwich(fit8)), digits = 3, single.row = T)

# zero-inflated beta regression

landtag_subset2 <- landtag_subset %>%
  dplyr::filter(which_federal_election %notin% c(13,14) |
                  state %notin% c("BB", "BE", "MV", "SN", "ST", "SN", "TH")) %>%
  dplyr::filter(which_federal_election %notin% c(19) |
                  party_elected_short %notin% c("AfD"))

landtag_subset2$joint_prob[landtag_subset2$ran == 1 & 
                             landtag_subset2$party_elected_short %in% c("Minor Parties", 
                                                                        "Other Rad. Right (DVU/NPD/REP)") & 
                             is.na(landtag_subset2$joint_prob)] <- 0.0001

landtag_subset2$joint_prob2 <- landtag_subset2$joint_prob
landtag_subset2$joint_prob2[landtag_subset2$joint_prob2 < 0.0001] <- 0.0001
landtag_subset2$joint_prob2[landtag_subset2$joint_prob2 > 0.9999] <- 0.9999
landtag_subset2$joint_prob2[landtag_subset2$ran == 0] <- 0

landtag_subset2 <- landtag_subset2 %>% 
  dplyr::filter(!is.na(joint_prob2) & !is.na(tier_dummy) & !is.na(party_position) & 
                  !is.na(legislative_position) & !is.na(exec_position) &
                  !is.na(pty_in_govt) & !is.na(age) & !is.na(seniority) & 
                  !is.na(pv) & !is.na(time_since_last_election) &
                  !is.na(party_elected_short) & !is.na(state) & !is.na(which_federal_election))

landtag_subset2$which_federal_election <- as.factor(as.character(landtag_subset2$which_federal_election))
landtag_subset2$tier_dummy <- as.factor(as.character(landtag_subset2$tier_dummy))
landtag_subset2$party_position <- as.factor(as.character(landtag_subset2$party_position))
landtag_subset2$legislative_position <- as.factor(as.character(landtag_subset2$legislative_position))
landtag_subset2$exec_position <- as.factor(as.character(landtag_subset2$exec_position))
landtag_subset2$pty_in_govt <- as.factor(as.character(landtag_subset2$pty_in_govt))
landtag_subset2$pv <- as.factor(as.character(landtag_subset2$pv))
landtag_subset2$age_scaled <- as.numeric(scale(landtag_subset2$age))
landtag_subset2$seniority_scaled <- as.numeric(scale(landtag_subset2$seniority))
landtag_subset2$time_since_last_election_scaled <- as.numeric(scale(landtag_subset2$time_since_last_election))
landtag_subset2$tier_dummy <- relevel(landtag_subset2$tier_dummy, ref = "SMD")

landtag_subset3 <- landtag_subset2 %>%
  dplyr::select(joint_prob2, tier_dummy, party_position , legislative_position , 
                exec_position , pty_in_govt , age_scaled , seniority_scaled ,
                pv , time_since_last_election_scaled , gender , 
                party_elected_short , state , which_federal_election)

fit_beinf <- gamlss(joint_prob2 ~ tier_dummy + party_position + legislative_position + 
                      exec_position + pty_in_govt + age_scaled + I(age_scaled^2) + seniority_scaled + I(seniority_scaled^2) +
                      pv + time_since_last_election_scaled + gender + 
                      party_elected_short + state + which_federal_election,
                    ~ tier_dummy + party_position + legislative_position + 
                      exec_position + pty_in_govt + age_scaled + I(age_scaled^2) + seniority_scaled + I(seniority_scaled^2) +
                      pv + time_since_last_election_scaled + gender + 
                      party_elected_short + state + which_federal_election,
                    ~ tier_dummy + party_position + legislative_position + 
                      exec_position + pty_in_govt + age_scaled + I(age_scaled^2) + seniority_scaled + I(seniority_scaled^2) +
                      pv + time_since_last_election_scaled + gender + 
                      party_elected_short + state + which_federal_election,
                    data = landtag_subset3,
                    family = BEZI(), method = RS(150)
)


su_beinf <- summary(fit_beinf)

write_rds(su_beinf, "summary_bize_mu_26042023.rds")

pvalues_mu <- ifelse(
  su_beinf[1:13,4] < 0.01, "***", 
  ifelse(
    su_beinf[1:13,4] < 0.05, "**",
    ifelse(
      su_beinf[1:13,4] < 0.1, "*", "")))

pvalues_nu <- ifelse(
  su_beinf[89:101,4] < 0.01, "***", 
  ifelse(
    su_beinf[89:101,4] < 0.05, "**",
    ifelse(
      su_beinf[89:101,4] < 0.1, "*", "")))


beinf_table <- xtable(
  cbind(rownames(su_beinf[1:13,]),
        paste(-round(su_beinf[89:101,1], 3), pvalues_nu, " (",
              round(su_beinf[89:101,2], 3), ")", sep = ""),      
        paste(round(su_beinf[1:13,1], 3), pvalues_mu, " (",
              round(su_beinf[1:13,2], 3), ")", sep = ""))
)

# produces table 6
print(beinf_table, include.rownames=FALSE)

me_nu3 <- marginaleffects::avg_slopes(fit_beinf, what = "nu", variables = c("tier_dummy", "party_position", "legislative_position",
                                                                            "exec_position", "pty_in_govt", "pv", "gender"))
me_mu3 <- marginaleffects::avg_slopes(fit_beinf, what = "mu", variables = c("tier_dummy", "party_position", "legislative_position",
                                                                            "exec_position", "pty_in_govt", "pv", "gender"))
write_rds(me_nu3, "marg_eff_bize_nu3_26042023.rds")
write_rds(me_mu3, "marg_eff_bize_mu3_26042023.rds")

ame_beinf <- rbind.data.frame(
  as_tibble(me_nu3) %>% dplyr::select(term, estimate, conf.low, conf.high) %>% 
    dplyr::mutate(estimate = - estimate, conf.low = -conf.low, conf.high = -conf.high) %>%
    dplyr::mutate(parameter = "Effect on Probability of \n Level-Hopping Attempt"),
  
  as_tibble(me_mu3) %>% dplyr::select(term, estimate, conf.low, conf.high) %>% 
    dplyr::mutate(parameter = "Effect on Candidacy Quality (Prospective \nProbability of Level-Hopping)")
) %>%
  dplyr::mutate(term = case_match(term, 
                                  "exec_position" ~ "Executive Position", 
                                  "gender" ~ "Gender (male)",
                                  "legislative_position" ~ "Legislative Position",
                                  "party_position" ~ "Party Leadership Position",
                                  "pty_in_govt" ~ "Party in State Govt.",
                                  "pv" ~ "Preferential Voting", 
                                  "tier_dummy" ~ "List PR",
                                  .default = term)) 

ame_beinf <- ame_beinf %>%
  dplyr::mutate(term = fct_relevel(term, c("Gender (male)", "Preferential Voting",
                                           "Party in State Govt.", "Legislative Position", 
                                           "Executive Position", "Party Leadership Position", "List PR")))

ame_beinf$parameter <- relevel(as.factor(ame_beinf$parameter), ref = "Effect on Probability of \n Level-Hopping Attempt")

ggplot(data = ame_beinf, mapping = aes(x = estimate, y = term, group = parameter)) + 
  geom_point(aes(x = estimate, y = term, group = parameter)) + 
  geom_errorbar(aes(xmin=conf.low,xmax=conf.high), width = 0) +
  theme_minimal() + facet_wrap(~parameter, scales = "free_x") + xlab("Average Marginal Effects") + ylab("") +
  geom_vline(xintercept = 0, lty = 2, lwd = 0.25) +
  ggtitle("Average Marginal Effects (Zero-Inflated Beta Regression)") + 
  theme(plot.title = element_text(hjust = 0.5)) 

# produces figure 7
ggsave("beinf_ame_plot.png")

#### Robustness Checks ####

# RC with different operationalisation of legislators' position
# first table (logit) and fourth table (multinomial) in appendix D

fit1_rc1 <- glm(data = landtag_subset, ran  ~ 
                  tier_dummy +
                  deputy_or_cw +
                  party_leader +
                  chair +
                  pres_or_vp +
                  exec_position +
                  pty_in_govt + 
                  age + I(age^2) +
                  seniority + I(seniority^2) + 
                  pv + time_since_last_election_years +
                  gender +
                  state + party_elected_short +
                  as.factor(which_federal_election)
                , family = binomial(link = "logit"))
summary(fit1_rc1)

robust_se_fit1_rc1 <- vcovHC(fit1_rc1, type = 'HC1')
fit1_rc1_robust_se <- coeftest(fit1_rc1, robust_se_fit1_rc1)
stargazer(fit1_rc1_robust_se, digits = 3, single.row = TRUE)

fit2_rc1 <- glm(data = landtag_subset, ran  ~ 
                  tier_dummy +
                  deputy_or_cw +
                  party_leader +
                  chair +
                  pres_or_vp +
                  exec_position +
                  pty_in_govt + 
                  age + I(age^2) +
                  seniority + I(seniority^2) + 
                  pv + time_since_last_election_years +
                  gender +
                  state*party_elected_short + as.factor(which_federal_election)
                , family = binomial(link = "logit"))
summary(fit2_rc1)

robust_se_fit2_rc1 <- vcovHC(fit2_rc1, type = 'HC1')
fit2_rc1_robust_se <- coeftest(fit2_rc1, robust_se_fit2_rc1)
stargazer(fit2_rc1_robust_se, digits = 3, single.row = TRUE)

fit3_rc1 <- glm(data = landtag_subset, ran  ~ 
                  tier_dummy +
                  deputy_or_cw +
                  party_leader +
                  chair +
                  pres_or_vp +
                  exec_position +
                  pty_in_govt + 
                  age + I(age^2) +
                  seniority + I(seniority^2) + 
                  pv + time_since_last_election_years +
                  gender +
                  state + party_elected_short*as.factor(which_federal_election)
                , family = binomial(link = "logit"))
summary(fit3_rc1)

robust_se_fit3_rc1 <- vcovHC(fit3_rc1, type = 'HC1')
fit3_rc1_robust_se <- coeftest(fit3_rc1, robust_se_fit3_rc1)
stargazer(fit3_rc1_robust_se, digits = 3, single.row = TRUE)

fit4_rc1 <- glm(data = subset(landtag_subset, state %notin% c("HH", "HB", "SL")), ran  ~ 
                  tier_dummy +
                  deputy_or_cw +
                  party_leader +
                  chair +
                  pres_or_vp +
                  exec_position +
                  pty_in_govt + 
                  age + I(age^2) +
                  seniority + I(seniority^2) + 
                  pv + time_since_last_election_years +
                  gender +
                  state + party_elected_short +
                  as.factor(which_federal_election)
                , family = binomial(link = "logit"))
summary(fit4_rc1)

robust_se_fit4_rc1 <- vcovHC(fit4_rc1, type = 'HC1')
fit4_rc1_robust_se <- coeftest(fit4_rc1, robust_se_fit4_rc1)
stargazer(fit4_rc1_robust_se, digits = 3, single.row = TRUE)

fit5_rc1 <- glm(data = subset(landtag_subset, state %notin% c("HH", "HB", "SL")), ran  ~ 
                  tier_dummy +
                  deputy_or_cw +
                  party_leader +
                  chair +
                  pres_or_vp +
                  exec_position +
                  pty_in_govt + 
                  age + I(age^2) +
                  seniority + I(seniority^2) + 
                  pv + time_since_last_election_years +
                  gender +
                  state*party_elected_short + as.factor(which_federal_election)
                , family = binomial(link = "logit"))
summary(fit5_rc1)

robust_se_fit5_rc1 <- vcovHC(fit5_rc1, type = 'HC1')
fit5_rc1_robust_se <- coeftest(fit5_rc1, robust_se_fit5_rc1)
stargazer(fit5_rc1_robust_se, digits = 3, single.row = TRUE)

fit6_rc1 <- glm(data = subset(landtag_subset, state %notin% c("HH", "HB", "SL")), ran  ~ 
                  tier_dummy +
                  deputy_or_cw +
                  party_leader +
                  chair +
                  pres_or_vp +
                  exec_position +
                  pty_in_govt + 
                  age + I(age^2) +
                  seniority + I(seniority^2) + 
                  pv + time_since_last_election_years +
                  gender +
                  state + party_elected_short*as.factor(which_federal_election)
                , family = binomial(link = "logit"))
summary(fit6_rc1)

robust_se_fit6_rc1 <- vcovHC(fit6_rc1, type = 'HC1')
fit6_rc1_robust_se <- coeftest(fit6_rc1, robust_se_fit6_rc1)
stargazer(fit6_rc1_robust_se, digits = 3, single.row = TRUE)

stargazer(fit1_rc1_robust_se, fit2_rc1_robust_se, fit3_rc1_robust_se, digits = 2, single.row = TRUE)
stargazer(fit4_rc1_robust_se, fit5_rc1_robust_se, fit6_rc1_robust_se, digits = 2, single.row = TRUE)            

c(round(DescTools::PseudoR2(fit1_rc1, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit2_rc1, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit3_rc1, which = "Nagelkerke"),3))
c(round(DescTools::PseudoR2(fit4_rc1, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit5_rc1, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit6_rc1, which = "Nagelkerke"),3))


fit7_rc1 <- mlogit(ran_type ~ 1 | tier_dummy +
                     deputy_or_cw +
                     party_leader +
                     chair +
                     pres_or_vp +
                     exec_position +
                     pty_in_govt + 
                     age + I(age^2)+
                     seniority + I(seniority^2) + 
                     pv + time_since_last_election_years +
                     gender +
                     party_elected_short + 
                     state + 
                     as.factor(which_federal_election), 
                   data = data, tol = 0.00001, 
                   unscaled = T)
summary(fit7_rc1)

sandwich(fit7_rc1)
stargazer(fit7_rc1, coeftest(fit7_rc1, sandwich(fit7_rc1)), type = "text")
stargazer(fit7_rc1, type = "text")

data2 <- dfidx(data = subset(landtag_subset, state %notin% c("HH", "HB", "SL")),
               idx = list(c("index", "which_federal_election")), choice="ran_type",
               shape="wide", 
               alt.levels = c("Didn't Run","Ran Secure","Ran Insecure"))

fit8_rc1 <- mlogit(ran_type ~ 1 | tier_dummy +
                     deputy_or_cw +
                     party_leader +
                     chair +
                     pres_or_vp +
                     exec_position +
                     pty_in_govt + 
                     age + I(age^2)+
                     seniority + I(seniority^2) + 
                     pv + time_since_last_election_years +
                     gender +
                     party_elected_short + 
                     state + 
                     as.factor(which_federal_election), 
                   data = data2, tol = 0.00001, 
                   unscaled = T)
summary(fit8_rc1)

sandwich(fit8_rc1)
stargazer(fit8_rc1, coeftest(fit8_rc1, sandwich(fit8_rc1)), type = "text")
stargazer(fit8_rc1, type = "text")

stargazer(fit7, coeftest(fit7, sandwich(fit7)), digits = 3, single.row = T)
stargazer(fit8, coeftest(fit8, sandwich(fit8)), digits = 3, single.row = T)


fit7_rc1_rse <- coeftest(fit7_rc1, sandwich(fit7_rc1))
fit8_rc1_rse <- coeftest(fit8_rc1, sandwich(fit8_rc1))

stargazer(fit7_rc1_rse, fit7_rc1_rse, fit8_rc1_rse, fit8_rc1_rse, digits = 3, single.row = F, type = "latex")

# RC with time to election instead of time since election
# second table (logit) and fifth table (multinom) in appendix D

# Code time between federal election and last state election
landtag_subset$max_term_length <- NA
landtag_subset$max_term_length[landtag_subset$state == "BW" & landtag_subset$legislature < 11] <- 4
landtag_subset$max_term_length[landtag_subset$state == "BW" & landtag_subset$legislature >= 11] <- 5
landtag_subset$max_term_length[landtag_subset$state == "BY" & landtag_subset$legislature < 13] <- 4
landtag_subset$max_term_length[landtag_subset$state == "BY" & landtag_subset$legislature >= 13] <- 5
landtag_subset$max_term_length[landtag_subset$state == "BE" & landtag_subset$legislature < 14] <- 4
landtag_subset$max_term_length[landtag_subset$state == "BE" & landtag_subset$legislature >= 14] <- 5
landtag_subset$max_term_length[landtag_subset$state == "BB" & landtag_subset$legislature < 2] <- 4
landtag_subset$max_term_length[landtag_subset$state == "BB" & landtag_subset$legislature >= 2] <- 5
landtag_subset$max_term_length[landtag_subset$state == "HB"] <- 4
landtag_subset$max_term_length[landtag_subset$state == "HH" & landtag_subset$legislature < 21] <- 4
landtag_subset$max_term_length[landtag_subset$state == "HH" & landtag_subset$legislature >= 21] <- 5
landtag_subset$max_term_length[landtag_subset$state == "HE" & landtag_subset$legislature < 16] <- 4
landtag_subset$max_term_length[landtag_subset$state == "HE" & landtag_subset$legislature >= 16] <- 5
landtag_subset$max_term_length[landtag_subset$state == "MV" & landtag_subset$legislature < 5] <- 4
landtag_subset$max_term_length[landtag_subset$state == "MV" & landtag_subset$legislature >= 5] <- 5
landtag_subset$max_term_length[landtag_subset$state == "NI" & landtag_subset$legislature < 14] <- 4
landtag_subset$max_term_length[landtag_subset$state == "NI" & landtag_subset$legislature >= 14] <- 5
landtag_subset$max_term_length[landtag_subset$state == "NW"] <- 5
landtag_subset$max_term_length[landtag_subset$state == "RP"] <- 5
landtag_subset$max_term_length[landtag_subset$state == "SL"] <- 5
landtag_subset$max_term_length[landtag_subset$state == "SH" & landtag_subset$legislature < 15] <- 4
landtag_subset$max_term_length[landtag_subset$state == "SH" & landtag_subset$legislature >= 15] <- 5
landtag_subset$max_term_length[landtag_subset$state == "SN" & landtag_subset$legislature < 2] <- 4
landtag_subset$max_term_length[landtag_subset$state == "SN" & landtag_subset$legislature >= 2] <- 5
landtag_subset$max_term_length[landtag_subset$state == "ST" & landtag_subset$legislature < 5] <- 4
landtag_subset$max_term_length[landtag_subset$state == "ST" & landtag_subset$legislature >= 5] <- 5
landtag_subset$max_term_length[landtag_subset$state == "TH" & landtag_subset$legislature < 2] <- 4
landtag_subset$max_term_length[landtag_subset$state == "TH" & landtag_subset$legislature >= 2] <- 5

landtag_subset$time_to_next_election <- NA
landtag_subset$time_to_next_election[landtag_subset$which_federal_election == 11] <-
  -(dmy(election_11_bundestag) - (dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 11]) + 365*landtag_subset$max_term_length[landtag_subset$which_federal_election == 11]))
landtag_subset$time_to_next_election[landtag_subset$which_federal_election == 12] <-
  -(dmy(election_12_bundestag) - (dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 12]) + 365*landtag_subset$max_term_length[landtag_subset$which_federal_election == 12]))
landtag_subset$time_to_next_election[landtag_subset$which_federal_election == 13] <-
  -(dmy(election_13_bundestag) - (dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 13]) + 365*landtag_subset$max_term_length[landtag_subset$which_federal_election == 13]))
landtag_subset$time_to_next_election[landtag_subset$which_federal_election == 14] <-
  -(dmy(election_14_bundestag) - (dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 14]) + 365*landtag_subset$max_term_length[landtag_subset$which_federal_election == 14]))
landtag_subset$time_to_next_election[landtag_subset$which_federal_election == 15] <-
  -(dmy(election_15_bundestag) - (dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 15]) + 365*landtag_subset$max_term_length[landtag_subset$which_federal_election == 15]))
landtag_subset$time_to_next_election[landtag_subset$which_federal_election == 16] <-
  -(dmy(election_16_bundestag) - (dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 16]) + 365*landtag_subset$max_term_length[landtag_subset$which_federal_election == 16]))
landtag_subset$time_to_next_election[landtag_subset$which_federal_election == 17] <-
  -(dmy(election_17_bundestag) - (dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 17]) + 365*landtag_subset$max_term_length[landtag_subset$which_federal_election == 17]))
landtag_subset$time_to_next_election[landtag_subset$which_federal_election == 18] <-
  -(dmy(election_18_bundestag) - (dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 18]) + 365*landtag_subset$max_term_length[landtag_subset$which_federal_election == 18]))
landtag_subset$time_to_next_election[landtag_subset$which_federal_election == 19] <-
  -(dmy(election_19_bundestag) - (dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 19]) + 365*landtag_subset$max_term_length[landtag_subset$which_federal_election == 19]))
landtag_subset$time_to_next_election[landtag_subset$which_federal_election == 20] <-
  -(dmy(election_20_bundestag) - (dmy(landtag_subset$election_date[landtag_subset$which_federal_election == 20]) + 365*landtag_subset$max_term_length[landtag_subset$which_federal_election == 20]))

landtag_subset$time_to_next_election_years <- landtag_subset$time_to_next_election/365


fit1_rc2 <- glm(data = landtag_subset, ran  ~ 
                  tier_dummy +
                  exec_position +
                  legislative_position + 
                  party_position + 
                  pty_in_govt + 
                  age + I(age^2) +
                  seniority + I(seniority^2) + 
                  pv + time_to_next_election_years +
                  gender +
                  state + party_elected_short +
                  as.factor(which_federal_election)
                , family = binomial(link = "logit"))
summary(fit1_rc2)

robust_se_fit1_rc2 <- vcovHC(fit1_rc2, type = 'HC1')
fit1_rc2_robust_se <- coeftest(fit1_rc2, robust_se_fit1_rc2)
stargazer(fit1_rc2_robust_se, digits = 3, single.row = TRUE)

fit2_rc2 <- glm(data = landtag_subset, ran  ~ 
                  tier_dummy +
                  exec_position +
                  legislative_position + 
                  party_position + 
                  pty_in_govt + 
                  age + I(age^2) +
                  seniority + I(seniority^2) + 
                  pv + time_to_next_election_years +
                  gender +
                  state*party_elected_short + as.factor(which_federal_election)
                , family = binomial(link = "logit"))
summary(fit2_rc2)

robust_se_fit2_rc2 <- vcovHC(fit2_rc2, type = 'HC1')
fit2_rc2_robust_se <- coeftest(fit2_rc2, robust_se_fit2_rc2)
stargazer(fit2_rc2_robust_se, digits = 3, single.row = TRUE)

fit3_rc2 <- glm(data = landtag_subset, ran  ~ 
                  tier_dummy +
                  exec_position +
                  legislative_position + 
                  party_position + 
                  pty_in_govt + 
                  age + I(age^2) +
                  seniority + I(seniority^2) + 
                  pv + time_to_next_election_years +
                  gender +
                  state + party_elected_short*as.factor(which_federal_election)
                , family = binomial(link = "logit"))
summary(fit3_rc2)

robust_se_fit3_rc2 <- vcovHC(fit3_rc2, type = 'HC1')
fit3_rc2_robust_se <- coeftest(fit3_rc2, robust_se_fit3_rc2)
stargazer(fit3_rc2_robust_se, digits = 3, single.row = TRUE)

fit4_rc2 <- glm(data = subset(landtag_subset, state %notin% c("HH", "HB", "SL")), ran  ~ 
                  tier_dummy +
                  exec_position +
                  legislative_position + 
                  party_position + 
                  pty_in_govt + 
                  age + I(age^2) +
                  seniority + I(seniority^2) + 
                  pv + time_to_next_election_years +
                  gender +
                  state + party_elected_short +
                  as.factor(which_federal_election)
                , family = binomial(link = "logit"))
summary(fit4_rc2)

robust_se_fit4_rc2 <- vcovHC(fit4_rc2, type = 'HC1')
fit4_rc2_robust_se <- coeftest(fit4_rc2, robust_se_fit4_rc2)
stargazer(fit4_rc2_robust_se, digits = 3, single.row = TRUE)

fit5_rc2 <- glm(data = subset(landtag_subset, state %notin% c("HH", "HB", "SL")), ran  ~ 
                  tier_dummy +
                  exec_position +
                  legislative_position + 
                  party_position + 
                  pty_in_govt + 
                  age + I(age^2) +
                  seniority + I(seniority^2) + 
                  pv + time_to_next_election_years +
                  gender +
                  state*party_elected_short + as.factor(which_federal_election)
                , family = binomial(link = "logit"))
summary(fit5_rc2)

robust_se_fit5_rc2 <- vcovHC(fit5_rc2, type = 'HC1')
fit5_rc2_robust_se <- coeftest(fit5_rc2, robust_se_fit5_rc2)
stargazer(fit5_rc2_robust_se, digits = 3, single.row = TRUE)

fit6_rc2 <- glm(data = subset(landtag_subset, state %notin% c("HH", "HB", "SL")), ran  ~ 
                  tier_dummy +
                  exec_position +
                  legislative_position + 
                  party_position + 
                  pty_in_govt + 
                  age + I(age^2) +
                  seniority + I(seniority^2) + 
                  pv + time_to_next_election_years +
                  gender +
                  state + party_elected_short*as.factor(which_federal_election)
                , family = binomial(link = "logit"))
summary(fit6_rc2)

robust_se_fit6_rc2 <- vcovHC(fit6_rc2, type = 'HC1')
fit6_rc2_robust_se <- coeftest(fit6_rc2, robust_se_fit6_rc2)
stargazer(fit6_rc2_robust_se, digits = 3, single.row = TRUE)

stargazer(fit1_rc2_robust_se, fit2_rc2_robust_se, fit3_rc2_robust_se, digits = 3, single.row = T)
stargazer(fit4_rc2_robust_se, fit5_rc2_robust_se, fit6_rc2_robust_se, digits = 3, single.row = T)

c(round(DescTools::PseudoR2(fit1_rc2, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit2_rc2, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit3_rc2, which = "Nagelkerke"),3))
c(round(DescTools::PseudoR2(fit4_rc2, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit5_rc2, which = "Nagelkerke"),3),
  round(DescTools::PseudoR2(fit6_rc2, which = "Nagelkerke"),3))

nobs(fit1_rc2) 
nobs(fit4_rc2)

landtag_subset$index <- paste(landtag_subset$entry_id, landtag_subset$which_federal_election)

landtag_subset$ran_type <- "Didn't Run"
landtag_subset$ran_type[landtag_subset$ran_secure == 1] <- "Ran Secure"
landtag_subset$ran_type[landtag_subset$ran_insecure == 1] <- "Ran Insecure"

data <- dfidx(data = landtag_subset,
              idx = list(c("index", "which_federal_election")), choice="ran_type",
              shape="wide", 
              alt.levels = c("Didn't Run","Ran Secure","Ran Insecure"))

fit7_rc2 <- mlogit(ran_type ~ 1 | tier_dummy +
                     exec_position +
                     legislative_position +
                     party_position +
                     pty_in_govt + 
                     age + I(age^2)+
                     seniority + I(seniority^2) + 
                     pv + time_to_next_election_years +
                     gender +
                     party_elected_short + 
                     state + 
                     as.factor(which_federal_election), 
                   data = data, tol = 0.00001, 
                   unscaled = T)
summary(fit7_rc2)

sandwich(fit7_rc2)
stargazer(fit7_rc2, coeftest(fit7_rc2, sandwich(fit7_rc2)), type = "text")
stargazer(fit7_rc2, type = "text")

data2 <- dfidx(data = subset(landtag_subset, state %notin% c("HH", "HB", "SL")),
               idx = list(c("index", "which_federal_election")), choice="ran_type",
               shape="wide", 
               alt.levels = c("Didn't Run","Ran Secure","Ran Insecure"))

fit8_rc2 <- mlogit(ran_type ~ 1 | tier_dummy +
                     exec_position +
                     legislative_position +
                     party_position +
                     pty_in_govt + 
                     age + I(age^2)+
                     seniority + I(seniority^2) + 
                     pv + time_to_next_election_years +
                     gender +
                     party_elected_short + 
                     state + 
                     as.factor(which_federal_election), 
                   data = data2, tol = 0.00001, 
                   unscaled = T)
summary(fit8_rc2)

sandwich(fit8_rc2)
stargazer(fit8_rc2, coeftest(fit8_rc2, sandwich(fit8_rc2)), type = "text")
stargazer(fit8_rc2, type = "text")


fit7_rc2_rse <- coeftest(fit7_rc2, sandwich(fit7_rc2))
fit8_rc2_rse <- coeftest(fit8_rc2, sandwich(fit8_rc2))

stargazer(fit7_rc2_rse, fit7_rc2_rse, fit8_rc2_rse, fit8_rc2_rse, digits = 3, single.row = F, type = "latex")


# Clustered standard errors at the state-election level
# third table in appendix D

clustered_se <- cluster.vcov(fit1, cbind(landtag_subset$state, landtag_subset$which_federal_election))
fit1_clustered_se <- coeftest(fit1, clustered_se)
stargazer(fit1, fit1_clustered_se, type = "text")

clustered_se <- cluster.vcov(fit2, cbind(landtag_subset$state, landtag_subset$which_federal_election))
fit2_clustered_se <- coeftest(fit2, clustered_se)
stargazer(fit2, fit2_clustered_se, type = "text")

clustered_se <- cluster.vcov(fit3, cbind(landtag_subset$state, landtag_subset$which_federal_election))
fit3_clustered_se <- coeftest(fit3, clustered_se)
stargazer(fit3, fit3_clustered_se, type = "text")

clustered_se <- cluster.vcov(fit4, landtag_subset %>% dplyr::filter(state %notin% c("HH", "HB", "SL")) %>%
                               dplyr::select(state, which_federal_election))
fit4_clustered_se <- coeftest(fit4, clustered_se)
stargazer(fit4, fit4_clustered_se, type = "text")

clustered_se <- cluster.vcov(fit5, landtag_subset %>% dplyr::filter(state %notin% c("HH", "HB", "SL")) %>%
                               dplyr::select(state, which_federal_election))
fit5_clustered_se <- coeftest(fit5, clustered_se)
stargazer(fit5, fit5_clustered_se, type = "text")

clustered_se <- cluster.vcov(fit6, landtag_subset %>% dplyr::filter(state %notin% c("HH", "HB", "SL")) %>%
                               dplyr::select(state, which_federal_election))
fit6_clustered_se <- coeftest(fit6, clustered_se)
stargazer(fit6, fit6_clustered_se, type = "text")

stargazer(fit1_clustered_se, fit2_clustered_se, fit3_clustered_se, digits = 3, single.row = T)
stargazer(fit4_clustered_se, fit5_clustered_se, fit6_clustered_se, digits = 3, single.row = T)

# RC with alternative DV: cutoffs depending on Joint Probability (continuous re-election prospects)
# fifth table in appendix D

landtag_subset2 <- landtag_subset %>%
  dplyr::filter(which_federal_election %notin% c(13,14) |
                  state %notin% c("BB", "BE", "MV", "SN", "ST", "SN", "TH")) %>%
  dplyr::filter(which_federal_election %notin% c(19) |
                  party_elected_short %notin% c("AfD"))

landtag_subset2$joint_prob[landtag_subset2$ran == 1 & 
                             landtag_subset2$party_elected_short %in% c("Minor Parties", 
                                                                        "Other Rad. Right (DVU/NPD/REP)") & 
                             is.na(landtag_subset2$joint_prob)] <- 0.0001

landtag_subset2$index <- paste(landtag_subset2$entry_id, landtag_subset2$which_federal_election)

landtag_subset2$ran_type_50 <- NA
landtag_subset2$ran_type_50[landtag_subset2$ran == 0] <- "Didn't Run"
landtag_subset2$ran_type_50[landtag_subset2$joint_prob >= 0.5] <- "Ran Secure"
landtag_subset2$ran_type_50[landtag_subset2$joint_prob < 0.5] <- "Ran Insecure"

data <- dfidx(data = landtag_subset2,
              idx = list(c("index", "which_federal_election")), choice="ran_type_50",
              shape="wide", 
              alt.levels = c("Didn't Run","Ran Secure","Ran Insecure"))

fit7_50 <- mlogit(ran_type_50 ~ 1 | tier_dummy +
                    exec_position +
                    legislative_position +
                    party_position +
                    pty_in_govt + 
                    age + I(age^2)+
                    seniority + I(seniority^2) + 
                    pv + I(time_since_last_election/365) +
                    gender +
                    party_elected_short + 
                    state + 
                    as.factor(which_federal_election), 
                  data = data, tol = 0.00001, 
                  unscaled = T)
summary(fit7_50)

data2 <- dfidx(data = subset(landtag_subset2, state %notin% c("HH", "HB", "SL")),
               idx = list(c("index", "which_federal_election")), choice="ran_type_50",
               shape="wide", 
               alt.levels = c("Didn't Run","Ran Secure","Ran Insecure"))

fit8_50 <- mlogit(ran_type_50 ~ 1 | tier_dummy +
                    exec_position +
                    legislative_position +
                    party_position +
                    pty_in_govt + 
                    age + I(age^2)+
                    seniority + I(seniority^2) + 
                    pv + I(time_since_last_election/365) +
                    gender +
                    party_elected_short + 
                    state + 
                    as.factor(which_federal_election), 
                  data = data2, tol = 0.00001, 
                  unscaled = T)
summary(fit8_50)

landtag_subset2$ran_type_35 <- NA
landtag_subset2$ran_type_35[landtag_subset2$ran == 0] <- "Didn't Run"
landtag_subset2$ran_type_35[landtag_subset2$joint_prob >= 0.35] <- "Ran Secure"
landtag_subset2$ran_type_35[landtag_subset2$joint_prob < 0.35] <- "Ran Insecure"

data <- dfidx(data = landtag_subset2,
              idx = list(c("index", "which_federal_election")), choice="ran_type_35",
              shape="wide", 
              alt.levels = c("Didn't Run","Ran Secure","Ran Insecure"))

fit7_35 <- mlogit(ran_type_35 ~ 1 | tier_dummy +
                    exec_position +
                    legislative_position +
                    party_position +
                    pty_in_govt + 
                    age + I(age^2)+
                    seniority + I(seniority^2) + 
                    pv + I(time_since_last_election/365) +
                    gender +
                    party_elected_short + 
                    state + 
                    as.factor(which_federal_election), 
                  data = data, tol = 0.00001, 
                  unscaled = T)
summary(fit7_35)

data2 <- dfidx(data = subset(landtag_subset2, state %notin% c("HH", "HB", "SL")),
               idx = list(c("index", "which_federal_election")), choice="ran_type_35",
               shape="wide", 
               alt.levels = c("Didn't Run","Ran Secure","Ran Insecure"))

fit8_35 <- mlogit(ran_type_35 ~ 1 | tier_dummy +
                    exec_position +
                    legislative_position +
                    party_position +
                    pty_in_govt + 
                    age + I(age^2)+
                    seniority + I(seniority^2) + 
                    pv + I(time_since_last_election/365) +
                    gender +
                    party_elected_short + 
                    state + 
                    as.factor(which_federal_election), 
                  data = data2, tol = 0.00001, 
                  unscaled = T)
summary(fit8_35)

landtag_subset2$ran_type_65 <- NA
landtag_subset2$ran_type_65[landtag_subset2$ran == 0] <- "Didn't Run"
landtag_subset2$ran_type_65[landtag_subset2$joint_prob >= 0.65] <- "Ran Secure"
landtag_subset2$ran_type_65[landtag_subset2$joint_prob < 0.65] <- "Ran Insecure"

data <- dfidx(data = landtag_subset2,
              idx = list(c("index", "which_federal_election")), choice="ran_type_65",
              shape="wide", 
              alt.levels = c("Didn't Run","Ran Secure","Ran Insecure"))

fit7_65 <- mlogit(ran_type_65 ~ 1 | tier_dummy +
                    exec_position +
                    legislative_position +
                    party_position +
                    pty_in_govt + 
                    age + I(age^2)+
                    seniority + I(seniority^2) + 
                    pv + I(time_since_last_election/365) +
                    gender +
                    party_elected_short + 
                    state + 
                    as.factor(which_federal_election), 
                  data = data, tol = 0.00001, 
                  unscaled = T)
summary(fit7_65)

data2 <- dfidx(data = subset(landtag_subset2, state %notin% c("HH", "HB", "SL")),
               idx = list(c("index", "which_federal_election")), choice="ran_type_65",
               shape="wide", 
               alt.levels = c("Didn't Run","Ran Secure","Ran Insecure"))

fit8_65 <- mlogit(ran_type_65 ~ 1 | tier_dummy +
                    exec_position +
                    legislative_position +
                    party_position +
                    pty_in_govt + 
                    age + I(age^2)+
                    seniority + I(seniority^2) + 
                    pv + I(time_since_last_election/365) +
                    gender +
                    party_elected_short + 
                    state + 
                    as.factor(which_federal_election), 
                  data = data2, tol = 0.00001, 
                  unscaled = T)
summary(fit8_65)

landtag_subset2$ran_type_10 <- NA
landtag_subset2$ran_type_10[landtag_subset2$ran == 0] <- "Didn't Run"
landtag_subset2$ran_type_10[landtag_subset2$joint_prob >= 0.10] <- "Ran Secure"
landtag_subset2$ran_type_10[landtag_subset2$joint_prob < 0.10] <- "Ran Insecure"

data <- dfidx(data = landtag_subset2,
              idx = list(c("index", "which_federal_election")), choice="ran_type_10",
              shape="wide", 
              alt.levels = c("Didn't Run","Ran Secure","Ran Insecure"))

fit7_10 <- mlogit(ran_type_10 ~ 1 | tier_dummy +
                    exec_position +
                    legislative_position +
                    party_position +
                    pty_in_govt + 
                    age + I(age^2)+
                    seniority + I(seniority^2) + 
                    pv + I(time_since_last_election/365) +
                    gender +
                    party_elected_short + 
                    state + 
                    as.factor(which_federal_election), 
                  data = data, tol = 0.00001, 
                  unscaled = T)
summary(fit7_10)

data2 <- dfidx(data = subset(landtag_subset2, state %notin% c("HH", "HB", "SL")),
               idx = list(c("index", "which_federal_election")), choice="ran_type_10",
               shape="wide", 
               alt.levels = c("Didn't Run","Ran Secure","Ran Insecure"))

fit8_10 <- mlogit(ran_type_10 ~ 1 | tier_dummy +
                    exec_position +
                    legislative_position +
                    party_position +
                    pty_in_govt + 
                    age + I(age^2)+
                    seniority + I(seniority^2) + 
                    pv + I(time_since_last_election/365) +
                    gender +
                    party_elected_short + 
                    state + 
                    as.factor(which_federal_election), 
                  data = data2, tol = 0.00001, 
                  unscaled = T)
summary(fit8_10)

landtag_subset$index <- paste(landtag_subset$entry_id, landtag_subset$which_federal_election)

landtag_subset$ran_outcome <- "Didn't Run"
landtag_subset$ran_outcome[landtag_subset$moved_up == 1] <- "Ran Succesfully"
landtag_subset$ran_outcome[landtag_subset$moved_up == 0 & landtag_subset$ran == 1] <- "Ran Unsuccessfully"

data <- dfidx(data = landtag_subset,
              idx = list(c("index", "which_federal_election")), choice="ran_outcome",
              shape="wide", 
              alt.levels = c("Didn't Run","Ran Succesfully","Ran Unsuccessfully"))

fit7_movedup <- mlogit(ran_outcome ~ 1 | tier_dummy +
                         exec_position +
                         legislative_position +
                         party_position +
                         pty_in_govt + 
                         age + I(age^2)+
                         seniority + I(seniority^2) + 
                         pv + 
                         time_since_last_election_years +
                         gender +
                         party_elected_short + 
                         state + 
                         as.factor(which_federal_election), 
                       data = data, tol = 0.00001, 
                       unscaled = T)
summary(fit7_movedup)

data2 <- dfidx(data = subset(landtag_subset, state %notin% c("HH", "HB", "SL")),
               idx = list(c("index", "which_federal_election")), choice="ran_outcome",
               shape="wide", 
               alt.levels = c("Didn't Run","Ran Succesfully","Ran Unsuccessfully"))

fit8_movedup <- mlogit(ran_outcome ~ 1 | tier_dummy +
                         exec_position +
                         legislative_position +
                         party_position +
                         pty_in_govt + 
                         age + I(age^2)+
                         seniority + I(seniority^2) + 
                         pv + 
                         time_since_last_election_years +
                         gender +
                         party_elected_short + 
                         state + 
                         as.factor(which_federal_election), 
                       data = data2, tol = 0.00001, 
                       unscaled = T)
summary(fit8_movedup)

fit7_10_rse <- coeftest(fit7_10, sandwich(fit7_10))
fit8_10_rse <- coeftest(fit8_10, sandwich(fit8_10))

fit7_35_rse <- coeftest(fit7_35, sandwich(fit7_35))
fit8_35_rse <- coeftest(fit8_35, sandwich(fit8_35))

fit7_50_rse <- coeftest(fit7_50, sandwich(fit7_50))
fit8_50_rse <- coeftest(fit8_50, sandwich(fit8_50))

fit7_65_rse <- coeftest(fit7_65, sandwich(fit7_65))
fit8_65_rse <- coeftest(fit8_65, sandwich(fit8_65))

fit7_movedup_rse <- coeftest(fit7_movedup, sandwich(fit7_movedup))
fit8_movedup_rse <- coeftest(fit8_movedup, sandwich(fit8_movedup))

stargazer(fit7_movedup_rse, fit7_movedup_rse, fit8_movedup_rse, fit8_movedup_rse, digits = 3, keep = "tier_dummy")
stargazer(fit7_10_rse, fit7_10_rse, fit8_10_rse, fit8_10_rse, digits = 3, keep = "tier_dummy")
stargazer(fit7_35_rse, fit7_35_rse, fit8_35_rse, fit8_35_rse, digits = 3, keep = "tier_dummy")
stargazer(fit7_50_rse, fit7_50_rse, fit8_50_rse, fit8_50_rse, digits = 3, keep = "tier_dummy")
stargazer(fit7_65_rse, fit7_65_rse, fit8_65_rse, fit8_65_rse, digits = 3, keep = "tier_dummy")

